xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0d4b0b6c1a374e6007cd4b2fbb5567147f7b1605)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.65 1996/08/08 14:43:31 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
15 
16 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
17 {
18   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
19   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
20 
21   /*
22      this is tacky: In the future when we have written special factorization
23      and solve routines for the identity permutation we should use a
24      stride index set instead of the general one.
25   */
26   if (type  == ORDER_NATURAL) {
27     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
28     for ( i=0; i<n; i++ ) idx[i] = i;
29     ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
30     ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
31     PetscFree(idx);
32     ISSetPermutation(*rperm);
33     ISSetPermutation(*cperm);
34     ISSetIdentity(*rperm);
35     ISSetIdentity(*cperm);
36     return 0;
37   }
38 
39   MatReorderingRegisterAll();
40   ishift = 0;
41   oshift = -MatReorderingIndexShift[(int)type];
42   if (MatReorderingRequiresSymmetric[(int)type]) {
43     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
44     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
45     PetscFree(ia); PetscFree(ja);
46   } else {
47     if (ishift == oshift) {
48       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
49     }
50     else if (ishift == -1) {
51       /* temporarily subtract 1 from i and j indices */
52       int nz = a->i[n] - 1;
53       for ( i=0; i<nz; i++ ) a->j[i]--;
54       for ( i=0; i<n+1; i++ ) a->i[i]--;
55       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
56       for ( i=0; i<nz; i++ ) a->j[i]++;
57       for ( i=0; i<n+1; i++ ) a->i[i]++;
58     } else {
59       /* temporarily add 1 to i and j indices */
60       int nz = a->i[n] - 1;
61       for ( i=0; i<nz; i++ ) a->j[i]++;
62       for ( i=0; i<n+1; i++ ) a->i[i]++;
63       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
64       for ( i=0; i<nz; i++ ) a->j[i]--;
65       for ( i=0; i<n+1; i++ ) a->i[i]--;
66     }
67   }
68   return 0;
69 }
70 
71 /*
72      Adds diagonal pointers to sparse matrix structure.
73 */
74 
75 int MatMarkDiag_SeqBAIJ(Mat A)
76 {
77   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
78   int         i,j, *diag, m = a->mbs;
79 
80   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
81   PLogObjectMemory(A,(m+1)*sizeof(int));
82   for ( i=0; i<m; i++ ) {
83     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
84       if (a->j[j] == i) {
85         diag[i] = j;
86         break;
87       }
88     }
89   }
90   a->diag = diag;
91   return 0;
92 }
93 
94 #include "draw.h"
95 #include "pinclude/pviewer.h"
96 #include "sys.h"
97 
98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
99 {
100   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
101   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102   Scalar      *aa;
103 
104   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
105   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
106   col_lens[0] = MAT_COOKIE;
107   col_lens[1] = a->m;
108   col_lens[2] = a->n;
109   col_lens[3] = a->nz*bs2;
110 
111   /* store lengths of each row and write (including header) to file */
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
116     }
117   }
118   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
119   PetscFree(col_lens);
120 
121   /* store column indices (zero start index) */
122   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
123   count = 0;
124   for ( i=0; i<a->mbs; i++ ) {
125     for ( j=0; j<bs; j++ ) {
126       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
127         for ( l=0; l<bs; l++ ) {
128           jj[count++] = bs*a->j[k] + l;
129         }
130       }
131     }
132   }
133   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
134   PetscFree(jj);
135 
136   /* store nonzero values */
137   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
138   count = 0;
139   for ( i=0; i<a->mbs; i++ ) {
140     for ( j=0; j<bs; j++ ) {
141       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
142         for ( l=0; l<bs; l++ ) {
143           aa[count++] = a->a[bs2*k + l*bs + j];
144         }
145       }
146     }
147   }
148   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
149   PetscFree(aa);
150   return 0;
151 }
152 
153 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
154 {
155   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
156   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
157   FILE        *fd;
158   char        *outputname;
159 
160   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
161   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
162   ierr = ViewerGetFormat(viewer,&format);
163   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
164     fprintf(fd,"  block size is %d\n",bs);
165   }
166   else if (format == ASCII_FORMAT_MATLAB) {
167     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
168   }
169   else if (format == ASCII_FORMAT_COMMON) {
170     for ( i=0; i<a->mbs; i++ ) {
171       for ( j=0; j<bs; j++ ) {
172         fprintf(fd,"row %d:",i*bs+j);
173         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
174           for ( l=0; l<bs; l++ ) {
175 #if defined(PETSC_COMPLEX)
176           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
177             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
178               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
179           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
180             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
181 #else
182           if (a->a[bs2*k + l*bs + j] != 0.0)
183             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
184 #endif
185           }
186         }
187         fprintf(fd,"\n");
188       }
189     }
190   }
191   else {
192     for ( i=0; i<a->mbs; i++ ) {
193       for ( j=0; j<bs; j++ ) {
194         fprintf(fd,"row %d:",i*bs+j);
195         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
196           for ( l=0; l<bs; l++ ) {
197 #if defined(PETSC_COMPLEX)
198           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
199             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
200               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
201           }
202           else {
203             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
204           }
205 #else
206             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
207 #endif
208           }
209         }
210         fprintf(fd,"\n");
211       }
212     }
213   }
214   fflush(fd);
215   return 0;
216 }
217 
218 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
219 {
220   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
221   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
222   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
223   Scalar       *aa;
224   Draw         draw;
225   DrawButton   button;
226   PetscTruth   isnull;
227 
228   ViewerDrawGetDraw(viewer,&draw);
229   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
230 
231   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
232   xr += w;    yr += h;  xl = -w;     yl = -h;
233   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
234   /* loop over matrix elements drawing boxes */
235   color = DRAW_BLUE;
236   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
237     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
238       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
239       x_l = a->j[j]*bs; x_r = x_l + 1.0;
240       aa = a->a + j*bs2;
241       for ( k=0; k<bs; k++ ) {
242         for ( l=0; l<bs; l++ ) {
243           if (PetscReal(*aa++) >=  0.) continue;
244           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
245         }
246       }
247     }
248   }
249   color = DRAW_CYAN;
250   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
251     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
252       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
253       x_l = a->j[j]*bs; x_r = x_l + 1.0;
254       aa = a->a + j*bs2;
255       for ( k=0; k<bs; k++ ) {
256         for ( l=0; l<bs; l++ ) {
257           if (PetscReal(*aa++) != 0.) continue;
258           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
259         }
260       }
261     }
262   }
263 
264   color = DRAW_RED;
265   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
266     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
267       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
268       x_l = a->j[j]*bs; x_r = x_l + 1.0;
269       aa = a->a + j*bs2;
270       for ( k=0; k<bs; k++ ) {
271         for ( l=0; l<bs; l++ ) {
272           if (PetscReal(*aa++) <= 0.) continue;
273           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
274         }
275       }
276     }
277   }
278 
279   DrawFlush(draw);
280   DrawGetPause(draw,&pause);
281   if (pause >= 0) { PetscSleep(pause); return 0;}
282 
283   /* allow the matrix to zoom or shrink */
284   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
285   while (button != BUTTON_RIGHT) {
286     DrawClear(draw);
287     if (button == BUTTON_LEFT) scale = .5;
288     else if (button == BUTTON_CENTER) scale = 2.;
289     xl = scale*(xl + w - xc) + xc - w*scale;
290     xr = scale*(xr - w - xc) + xc + w*scale;
291     yl = scale*(yl + h - yc) + yc - h*scale;
292     yr = scale*(yr - h - yc) + yc + h*scale;
293     w *= scale; h *= scale;
294     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
295     color = DRAW_BLUE;
296     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
297       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
298         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
299         x_l = a->j[j]*bs; x_r = x_l + 1.0;
300         aa = a->a + j*bs2;
301         for ( k=0; k<bs; k++ ) {
302           for ( l=0; l<bs; l++ ) {
303             if (PetscReal(*aa++) >=  0.) continue;
304             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
305           }
306         }
307       }
308     }
309     color = DRAW_CYAN;
310     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
311       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
312         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
313         x_l = a->j[j]*bs; x_r = x_l + 1.0;
314         aa = a->a + j*bs2;
315         for ( k=0; k<bs; k++ ) {
316           for ( l=0; l<bs; l++ ) {
317           if (PetscReal(*aa++) != 0.) continue;
318           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
319           }
320         }
321       }
322     }
323 
324     color = DRAW_RED;
325     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
326       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
327         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
328         x_l = a->j[j]*bs; x_r = x_l + 1.0;
329         aa = a->a + j*bs2;
330         for ( k=0; k<bs; k++ ) {
331           for ( l=0; l<bs; l++ ) {
332             if (PetscReal(*aa++) <= 0.) continue;
333             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
334           }
335         }
336       }
337     }
338     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
339   }
340   return 0;
341 }
342 
343 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
344 {
345   Mat         A = (Mat) obj;
346   ViewerType  vtype;
347   int         ierr;
348 
349   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
350   if (vtype == MATLAB_VIEWER) {
351     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
352   }
353   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
354     return MatView_SeqBAIJ_ASCII(A,viewer);
355   }
356   else if (vtype == BINARY_FILE_VIEWER) {
357     return MatView_SeqBAIJ_Binary(A,viewer);
358   }
359   else if (vtype == DRAW_VIEWER) {
360     return MatView_SeqBAIJ_Draw(A,viewer);
361   }
362   return 0;
363 }
364 
365 #define CHUNKSIZE  10
366 
367 /* This version has row oriented v  */
368 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
369 {
370   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
371   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
372   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
373   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
374   int          ridx,cidx,bs2=a->bs2;
375   Scalar      *ap,value,*aa=a->a,*bap;
376 
377   for ( k=0; k<m; k++ ) { /* loop over added rows */
378     row  = im[k]; brow = row/bs;
379     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
380     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
381     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
382     rmax = imax[brow]; nrow = ailen[brow];
383     low = 0;
384     for ( l=0; l<n; l++ ) { /* loop over added columns */
385       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
386       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
387       col = in[l]; bcol = col/bs;
388       ridx = row % bs; cidx = col % bs;
389       if (roworiented) {
390         value = *v++;
391       }
392       else {
393         value = v[k + l*m];
394       }
395       if (!sorted) low = 0; high = nrow;
396       while (high-low > 5) {
397         t = (low+high)/2;
398         if (rp[t] > bcol) high = t;
399         else             low  = t;
400       }
401       for ( i=low; i<high; i++ ) {
402         if (rp[i] > bcol) break;
403         if (rp[i] == bcol) {
404           bap  = ap +  bs2*i + bs*cidx + ridx;
405           if (is == ADD_VALUES) *bap += value;
406           else                  *bap  = value;
407           goto noinsert;
408         }
409       }
410       if (nonew) goto noinsert;
411       if (nrow >= rmax) {
412         /* there is no extra room in row, therefore enlarge */
413         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
414         Scalar *new_a;
415 
416         /* malloc new storage space */
417         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
418         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
419         new_j   = (int *) (new_a + bs2*new_nz);
420         new_i   = new_j + new_nz;
421 
422         /* copy over old data into new slots */
423         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
424         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
425         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
426         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
427         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
428                                                            len*sizeof(int));
429         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
430         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
431         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
432                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
433         /* free up old matrix storage */
434         PetscFree(a->a);
435         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
436         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
437         a->singlemalloc = 1;
438 
439         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
440         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
441         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
442         a->maxnz += bs2*CHUNKSIZE;
443         a->reallocs++;
444         a->nz++;
445       }
446       N = nrow++ - 1;
447       /* shift up all the later entries in this row */
448       for ( ii=N; ii>=i; ii-- ) {
449         rp[ii+1] = rp[ii];
450          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
451       }
452       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
453       rp[i] = bcol;
454       ap[bs2*i + bs*cidx + ridx] = value;
455       noinsert:;
456       low = i;
457     }
458     ailen[brow] = nrow;
459   }
460   return 0;
461 }
462 
463 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = a->m; *n = a->n;
467   return 0;
468 }
469 
470 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   *m = 0; *n = a->m;
474   return 0;
475 }
476 
477 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
478 {
479   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
480   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
481   Scalar      *aa,*v_i,*aa_i;
482 
483   bs  = a->bs;
484   ai  = a->i;
485   aj  = a->j;
486   aa  = a->a;
487   bs2 = a->bs2;
488 
489   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
490 
491   bn  = row/bs;   /* Block number */
492   bp  = row % bs; /* Block Position */
493   M   = ai[bn+1] - ai[bn];
494   *nz = bs*M;
495 
496   if (v) {
497     *v = 0;
498     if (*nz) {
499       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
500       for ( i=0; i<M; i++ ) { /* for each block in the block row */
501         v_i  = *v + i*bs;
502         aa_i = aa + bs2*(ai[bn] + i);
503         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
504       }
505     }
506   }
507 
508   if (idx) {
509     *idx = 0;
510     if (*nz) {
511       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
512       for ( i=0; i<M; i++ ) { /* for each block in the block row */
513         idx_i = *idx + i*bs;
514         itmp  = bs*aj[ai[bn] + i];
515         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
516       }
517     }
518   }
519   return 0;
520 }
521 
522 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
523 {
524   if (idx) {if (*idx) PetscFree(*idx);}
525   if (v)   {if (*v)   PetscFree(*v);}
526   return 0;
527 }
528 
529 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
530 {
531   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
532   Mat         C;
533   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
534   int         *rows,*cols,bs2=a->bs2;
535   Scalar      *array=a->a;
536 
537   if (B==PETSC_NULL && mbs!=nbs)
538     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
539   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
540   PetscMemzero(col,(1+nbs)*sizeof(int));
541 
542   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
543   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
544   PetscFree(col);
545   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
546   cols = rows + bs;
547   for ( i=0; i<mbs; i++ ) {
548     cols[0] = i*bs;
549     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
550     len = ai[i+1] - ai[i];
551     for ( j=0; j<len; j++ ) {
552       rows[0] = (*aj++)*bs;
553       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
554       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
555       array += bs2;
556     }
557   }
558   PetscFree(rows);
559 
560   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
561   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
562 
563   if (B != PETSC_NULL) {
564     *B = C;
565   } else {
566     /* This isn't really an in-place transpose */
567     PetscFree(a->a);
568     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
569     if (a->diag) PetscFree(a->diag);
570     if (a->ilen) PetscFree(a->ilen);
571     if (a->imax) PetscFree(a->imax);
572     if (a->solve_work) PetscFree(a->solve_work);
573     PetscFree(a);
574     PetscMemcpy(A,C,sizeof(struct _Mat));
575     PetscHeaderDestroy(C);
576   }
577   return 0;
578 }
579 
580 
581 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
582 {
583   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
584   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
585   int        m = a->m,*ip, N, *ailen = a->ilen;
586   int        mbs = a->mbs, bs2 = a->bs2;
587   Scalar     *aa = a->a, *ap;
588 
589   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
590 
591   for ( i=1; i<mbs; i++ ) {
592     /* move each row back by the amount of empty slots (fshift) before it*/
593     fshift += imax[i-1] - ailen[i-1];
594     if (fshift) {
595       ip = aj + ai[i]; ap = aa + bs2*ai[i];
596       N = ailen[i];
597       for ( j=0; j<N; j++ ) {
598         ip[j-fshift] = ip[j];
599         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
600       }
601     }
602     ai[i] = ai[i-1] + ailen[i-1];
603   }
604   if (mbs) {
605     fshift += imax[mbs-1] - ailen[mbs-1];
606     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
607   }
608   /* reset ilen and imax for each row */
609   for ( i=0; i<mbs; i++ ) {
610     ailen[i] = imax[i] = ai[i+1] - ai[i];
611   }
612   a->nz = ai[mbs];
613 
614   /* diagonals may have moved, so kill the diagonal pointers */
615   if (fshift && a->diag) {
616     PetscFree(a->diag);
617     PLogObjectMemory(A,-(m+1)*sizeof(int));
618     a->diag = 0;
619   }
620   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
621            fshift*bs2,a->nz*bs2,m,a->bs);
622   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
623            a->reallocs);
624   return 0;
625 }
626 
627 static int MatZeroEntries_SeqBAIJ(Mat A)
628 {
629   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
630   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
631   return 0;
632 }
633 
634 int MatDestroy_SeqBAIJ(PetscObject obj)
635 {
636   Mat         A  = (Mat) obj;
637   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
638 
639 #if defined(PETSC_LOG)
640   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
641 #endif
642   PetscFree(a->a);
643   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
644   if (a->diag) PetscFree(a->diag);
645   if (a->ilen) PetscFree(a->ilen);
646   if (a->imax) PetscFree(a->imax);
647   if (a->solve_work) PetscFree(a->solve_work);
648   if (a->mult_work) PetscFree(a->mult_work);
649   PetscFree(a);
650   PLogObjectDestroy(A);
651   PetscHeaderDestroy(A);
652   return 0;
653 }
654 
655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
656 {
657   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
658   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
659   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
660   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
661   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
662   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
663   else if (op == MAT_ROWS_SORTED ||
664            op == MAT_SYMMETRIC ||
665            op == MAT_STRUCTURALLY_SYMMETRIC ||
666            op == MAT_YES_NEW_DIAGONALS)
667     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
668   else if (op == MAT_NO_NEW_DIAGONALS)
669     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
670   else
671     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
672   return 0;
673 }
674 
675 
676 /* -------------------------------------------------------*/
677 /* Should check that shapes of vectors and matrices match */
678 /* -------------------------------------------------------*/
679 #include "pinclude/plapack.h"
680 
681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
682 {
683   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
684   register Scalar *x,*z,*v,sum;
685   int             mbs=a->mbs,i,*idx,*ii,n;
686 
687   VecGetArray_Fast(xx,x);
688   VecGetArray_Fast(zz,z);
689 
690   idx   = a->j;
691   v     = a->a;
692   ii    = a->i;
693 
694   for ( i=0; i<mbs; i++ ) {
695     n    = ii[1] - ii[0]; ii++;
696     sum  = 0.0;
697     while (n--) sum += *v++ * x[*idx++];
698     z[i] = sum;
699   }
700   VecRestoreArray_Fast(xx,x);
701   VecRestoreArray_Fast(zz,z);
702   PLogFlops(2*a->nz - a->m);
703   return 0;
704 }
705 
706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
707 {
708   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
709   register Scalar *x,*z,*v,*xb,sum1,sum2;
710   register Scalar x1,x2;
711   int             mbs=a->mbs,i,*idx,*ii;
712   int             j,n;
713 
714   VecGetArray_Fast(xx,x);
715   VecGetArray_Fast(zz,z);
716 
717   idx   = a->j;
718   v     = a->a;
719   ii    = a->i;
720 
721   for ( i=0; i<mbs; i++ ) {
722     n  = ii[1] - ii[0]; ii++;
723     sum1 = 0.0; sum2 = 0.0;
724     for ( j=0; j<n; j++ ) {
725       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
726       sum1 += v[0]*x1 + v[2]*x2;
727       sum2 += v[1]*x1 + v[3]*x2;
728       v += 4;
729     }
730     z[0] = sum1; z[1] = sum2;
731     z += 2;
732   }
733   VecRestoreArray_Fast(xx,x);
734   VecRestoreArray_Fast(zz,z);
735   PLogFlops(4*a->nz - a->m);
736   return 0;
737 }
738 
739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
740 {
741   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
742   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
743   int             mbs=a->mbs,i,*idx,*ii,j,n;
744 
745   VecGetArray_Fast(xx,x);
746   VecGetArray_Fast(zz,z);
747 
748   idx   = a->j;
749   v     = a->a;
750   ii    = a->i;
751 
752   for ( i=0; i<mbs; i++ ) {
753     n  = ii[1] - ii[0]; ii++;
754     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
755     for ( j=0; j<n; j++ ) {
756       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
758       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
759       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
760       v += 9;
761     }
762     z[0] = sum1; z[1] = sum2; z[2] = sum3;
763     z += 3;
764   }
765   VecRestoreArray_Fast(xx,x);
766   VecRestoreArray_Fast(zz,z);
767   PLogFlops(18*a->nz - a->m);
768   return 0;
769 }
770 
771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
772 {
773   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
774   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
775   register Scalar x1,x2,x3,x4;
776   int             mbs=a->mbs,i,*idx,*ii;
777   int             j,n;
778 
779   VecGetArray_Fast(xx,x);
780   VecGetArray_Fast(zz,z);
781 
782   idx   = a->j;
783   v     = a->a;
784   ii    = a->i;
785 
786   for ( i=0; i<mbs; i++ ) {
787     n  = ii[1] - ii[0]; ii++;
788     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
789     for ( j=0; j<n; j++ ) {
790       xb = x + 4*(*idx++);
791       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
792       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
793       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
794       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
795       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
796       v += 16;
797     }
798     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
799     z += 4;
800   }
801   VecRestoreArray_Fast(xx,x);
802   VecRestoreArray_Fast(zz,z);
803   PLogFlops(32*a->nz - a->m);
804   return 0;
805 }
806 
807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
808 {
809   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
810   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
811   register Scalar x1,x2,x3,x4,x5;
812   int             mbs=a->mbs,i,*idx,*ii,j,n;
813 
814   VecGetArray_Fast(xx,x);
815   VecGetArray_Fast(zz,z);
816 
817   idx   = a->j;
818   v     = a->a;
819   ii    = a->i;
820 
821   for ( i=0; i<mbs; i++ ) {
822     n  = ii[1] - ii[0]; ii++;
823     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
824     for ( j=0; j<n; j++ ) {
825       xb = x + 5*(*idx++);
826       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
827       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
828       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
829       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
830       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
831       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
832       v += 25;
833     }
834     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
835     z += 5;
836   }
837   VecRestoreArray_Fast(xx,x);
838   VecRestoreArray_Fast(zz,z);
839   PLogFlops(50*a->nz - a->m);
840   return 0;
841 }
842 
843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
844 {
845   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
846   register Scalar *x,*z,*v,*xb;
847   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
848   int             _One = 1,ncols,k;
849   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
850 
851   VecGetArray_Fast(xx,x);
852   VecGetArray_Fast(zz,z);
853 
854   idx   = a->j;
855   v     = a->a;
856   ii    = a->i;
857 
858 
859   if (!a->mult_work) {
860     k = PetscMax(a->m,a->n);
861     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
862   }
863   work = a->mult_work;
864   for ( i=0; i<mbs; i++ ) {
865     n     = ii[1] - ii[0]; ii++;
866     ncols = n*bs;
867     workt = work;
868     for ( j=0; j<n; j++ ) {
869       xb = x + bs*(*idx++);
870       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
871       workt += bs;
872     }
873     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
874     v += n*bs2;
875     z += bs;
876   }
877   VecRestoreArray_Fast(xx,x);
878   VecRestoreArray_Fast(zz,z);
879   PLogFlops(2*a->nz*bs2 - a->m);
880   return 0;
881 }
882 
883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
884 {
885   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
886   register Scalar *x,*y,*z,*v,sum;
887   int             mbs=a->mbs,i,*idx,*ii,n;
888 
889   VecGetArray_Fast(xx,x);
890   VecGetArray_Fast(yy,y);
891   VecGetArray_Fast(zz,z);
892 
893   idx   = a->j;
894   v     = a->a;
895   ii    = a->i;
896 
897   for ( i=0; i<mbs; i++ ) {
898     n    = ii[1] - ii[0]; ii++;
899     sum  = y[i];
900     while (n--) sum += *v++ * x[*idx++];
901     z[i] = sum;
902   }
903   VecRestoreArray_Fast(xx,x);
904   VecRestoreArray_Fast(yy,y);
905   VecRestoreArray_Fast(zz,z);
906   PLogFlops(2*a->nz);
907   return 0;
908 }
909 
910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
911 {
912   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
913   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
914   register Scalar x1,x2;
915   int             mbs=a->mbs,i,*idx,*ii;
916   int             j,n;
917 
918   VecGetArray_Fast(xx,x);
919   VecGetArray_Fast(yy,y);
920   VecGetArray_Fast(zz,z);
921 
922   idx   = a->j;
923   v     = a->a;
924   ii    = a->i;
925 
926   for ( i=0; i<mbs; i++ ) {
927     n  = ii[1] - ii[0]; ii++;
928     sum1 = y[0]; sum2 = y[1];
929     for ( j=0; j<n; j++ ) {
930       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
931       sum1 += v[0]*x1 + v[2]*x2;
932       sum2 += v[1]*x1 + v[3]*x2;
933       v += 4;
934     }
935     z[0] = sum1; z[1] = sum2;
936     z += 2; y += 2;
937   }
938   VecRestoreArray_Fast(xx,x);
939   VecRestoreArray_Fast(yy,y);
940   VecRestoreArray_Fast(zz,z);
941   PLogFlops(4*a->nz);
942   return 0;
943 }
944 
945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
946 {
947   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
948   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
949   int             mbs=a->mbs,i,*idx,*ii,j,n;
950 
951   VecGetArray_Fast(xx,x);
952   VecGetArray_Fast(yy,y);
953   VecGetArray_Fast(zz,z);
954 
955   idx   = a->j;
956   v     = a->a;
957   ii    = a->i;
958 
959   for ( i=0; i<mbs; i++ ) {
960     n  = ii[1] - ii[0]; ii++;
961     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
962     for ( j=0; j<n; j++ ) {
963       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
964       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
965       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
966       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
967       v += 9;
968     }
969     z[0] = sum1; z[1] = sum2; z[2] = sum3;
970     z += 3; y += 3;
971   }
972   VecRestoreArray_Fast(xx,x);
973   VecRestoreArray_Fast(yy,y);
974   VecRestoreArray_Fast(zz,z);
975   PLogFlops(18*a->nz);
976   return 0;
977 }
978 
979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
982   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
983   register Scalar x1,x2,x3,x4;
984   int             mbs=a->mbs,i,*idx,*ii;
985   int             j,n;
986 
987   VecGetArray_Fast(xx,x);
988   VecGetArray_Fast(yy,y);
989   VecGetArray_Fast(zz,z);
990 
991   idx   = a->j;
992   v     = a->a;
993   ii    = a->i;
994 
995   for ( i=0; i<mbs; i++ ) {
996     n  = ii[1] - ii[0]; ii++;
997     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
998     for ( j=0; j<n; j++ ) {
999       xb = x + 4*(*idx++);
1000       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1001       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1002       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1003       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1004       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1005       v += 16;
1006     }
1007     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1008     z += 4; y += 4;
1009   }
1010   VecRestoreArray_Fast(xx,x);
1011   VecRestoreArray_Fast(yy,y);
1012   VecRestoreArray_Fast(zz,z);
1013   PLogFlops(32*a->nz);
1014   return 0;
1015 }
1016 
1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1018 {
1019   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1020   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1021   register Scalar x1,x2,x3,x4,x5;
1022   int             mbs=a->mbs,i,*idx,*ii,j,n;
1023 
1024   VecGetArray_Fast(xx,x);
1025   VecGetArray_Fast(yy,y);
1026   VecGetArray_Fast(zz,z);
1027 
1028   idx   = a->j;
1029   v     = a->a;
1030   ii    = a->i;
1031 
1032   for ( i=0; i<mbs; i++ ) {
1033     n  = ii[1] - ii[0]; ii++;
1034     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1035     for ( j=0; j<n; j++ ) {
1036       xb = x + 5*(*idx++);
1037       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1038       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1039       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1040       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1041       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1042       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1043       v += 25;
1044     }
1045     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1046     z += 5; y += 5;
1047   }
1048   VecRestoreArray_Fast(xx,x);
1049   VecRestoreArray_Fast(yy,y);
1050   VecRestoreArray_Fast(zz,z);
1051   PLogFlops(50*a->nz);
1052   return 0;
1053 }
1054 
1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1058   register Scalar *x,*z,*v,*xb;
1059   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1060   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1061 
1062   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1063 
1064   VecGetArray_Fast(xx,x);
1065   VecGetArray_Fast(zz,z);
1066 
1067   idx   = a->j;
1068   v     = a->a;
1069   ii    = a->i;
1070 
1071 
1072   if (!a->mult_work) {
1073     k = PetscMax(a->m,a->n);
1074     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1075   }
1076   work = a->mult_work;
1077   for ( i=0; i<mbs; i++ ) {
1078     n     = ii[1] - ii[0]; ii++;
1079     ncols = n*bs;
1080     workt = work;
1081     for ( j=0; j<n; j++ ) {
1082       xb = x + bs*(*idx++);
1083       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1084       workt += bs;
1085     }
1086     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1087     v += n*bs2;
1088     z += bs;
1089   }
1090   VecRestoreArray_Fast(xx,x);
1091   VecRestoreArray_Fast(zz,z);
1092   PLogFlops(2*a->nz*bs2 );
1093   return 0;
1094 }
1095 
1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1097 {
1098   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1099   Scalar          *xg,*zg,*zb;
1100   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1101   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1102   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1103 
1104 
1105   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1106   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1107   PetscMemzero(z,N*sizeof(Scalar));
1108 
1109   idx   = a->j;
1110   v     = a->a;
1111   ii    = a->i;
1112 
1113   switch (bs) {
1114   case 1:
1115     for ( i=0; i<mbs; i++ ) {
1116       n  = ii[1] - ii[0]; ii++;
1117       xb = x + i; x1 = xb[0];
1118       ib = idx + ai[i];
1119       for ( j=0; j<n; j++ ) {
1120         rval    = ib[j];
1121         z[rval] += *v++ * x1;
1122       }
1123     }
1124     break;
1125   case 2:
1126     for ( i=0; i<mbs; i++ ) {
1127       n  = ii[1] - ii[0]; ii++;
1128       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1129       ib = idx + ai[i];
1130       for ( j=0; j<n; j++ ) {
1131         rval      = ib[j]*2;
1132         z[rval++] += v[0]*x1 + v[1]*x2;
1133         z[rval++] += v[2]*x1 + v[3]*x2;
1134         v += 4;
1135       }
1136     }
1137     break;
1138   case 3:
1139     for ( i=0; i<mbs; i++ ) {
1140       n  = ii[1] - ii[0]; ii++;
1141       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1142       ib = idx + ai[i];
1143       for ( j=0; j<n; j++ ) {
1144         rval      = ib[j]*3;
1145         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1146         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1147         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1148         v += 9;
1149       }
1150     }
1151     break;
1152   case 4:
1153     for ( i=0; i<mbs; i++ ) {
1154       n  = ii[1] - ii[0]; ii++;
1155       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1156       ib = idx + ai[i];
1157       for ( j=0; j<n; j++ ) {
1158         rval      = ib[j]*4;
1159         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1160         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1161         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1162         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1163         v += 16;
1164       }
1165     }
1166     break;
1167   case 5:
1168     for ( i=0; i<mbs; i++ ) {
1169       n  = ii[1] - ii[0]; ii++;
1170       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171       x4 = xb[3];   x5 = xb[4];
1172       ib = idx + ai[i];
1173       for ( j=0; j<n; j++ ) {
1174         rval      = ib[j]*5;
1175         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180         v += 25;
1181       }
1182     }
1183     break;
1184       /* block sizes larger then 5 by 5 are handled by BLAS */
1185     default: {
1186       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1187       if (!a->mult_work) {
1188         k = PetscMax(a->m,a->n);
1189         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1190         CHKPTRQ(a->mult_work);
1191       }
1192       work = a->mult_work;
1193       for ( i=0; i<mbs; i++ ) {
1194         n     = ii[1] - ii[0]; ii++;
1195         ncols = n*bs;
1196         PetscMemzero(work,ncols*sizeof(Scalar));
1197         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1198         v += n*bs2;
1199         x += bs;
1200         workt = work;
1201         for ( j=0; j<n; j++ ) {
1202           zb = z + bs*(*idx++);
1203           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1204           workt += bs;
1205         }
1206       }
1207     }
1208   }
1209   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1210   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1211   PLogFlops(2*a->nz*a->bs2 - a->n);
1212   return 0;
1213 }
1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1217   Scalar          *xg,*zg,*zb;
1218   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1219   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1220   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1221 
1222 
1223 
1224   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1225   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1226 
1227   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1228   else PetscMemzero(z,N*sizeof(Scalar));
1229 
1230 
1231   idx   = a->j;
1232   v     = a->a;
1233   ii    = a->i;
1234 
1235   switch (bs) {
1236   case 1:
1237     for ( i=0; i<mbs; i++ ) {
1238       n  = ii[1] - ii[0]; ii++;
1239       xb = x + i; x1 = xb[0];
1240       ib = idx + ai[i];
1241       for ( j=0; j<n; j++ ) {
1242         rval    = ib[j];
1243         z[rval] += *v++ * x1;
1244       }
1245     }
1246     break;
1247   case 2:
1248     for ( i=0; i<mbs; i++ ) {
1249       n  = ii[1] - ii[0]; ii++;
1250       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1251       ib = idx + ai[i];
1252       for ( j=0; j<n; j++ ) {
1253         rval      = ib[j]*2;
1254         z[rval++] += v[0]*x1 + v[1]*x2;
1255         z[rval++] += v[2]*x1 + v[3]*x2;
1256         v += 4;
1257       }
1258     }
1259     break;
1260   case 3:
1261     for ( i=0; i<mbs; i++ ) {
1262       n  = ii[1] - ii[0]; ii++;
1263       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1264       ib = idx + ai[i];
1265       for ( j=0; j<n; j++ ) {
1266         rval      = ib[j]*3;
1267         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1268         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1269         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1270         v += 9;
1271       }
1272     }
1273     break;
1274   case 4:
1275     for ( i=0; i<mbs; i++ ) {
1276       n  = ii[1] - ii[0]; ii++;
1277       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278       ib = idx + ai[i];
1279       for ( j=0; j<n; j++ ) {
1280         rval      = ib[j]*4;
1281         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1282         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1283         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1284         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1285         v += 16;
1286       }
1287     }
1288     break;
1289   case 5:
1290     for ( i=0; i<mbs; i++ ) {
1291       n  = ii[1] - ii[0]; ii++;
1292       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1293       x4 = xb[3];   x5 = xb[4];
1294       ib = idx + ai[i];
1295       for ( j=0; j<n; j++ ) {
1296         rval      = ib[j]*5;
1297         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1298         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1299         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1300         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1301         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1302         v += 25;
1303       }
1304     }
1305     break;
1306       /* block sizes larger then 5 by 5 are handled by BLAS */
1307     default: {
1308       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1309       if (!a->mult_work) {
1310         k = PetscMax(a->m,a->n);
1311         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1312         CHKPTRQ(a->mult_work);
1313       }
1314       work = a->mult_work;
1315       for ( i=0; i<mbs; i++ ) {
1316         n     = ii[1] - ii[0]; ii++;
1317         ncols = n*bs;
1318         PetscMemzero(work,ncols*sizeof(Scalar));
1319         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1320         v += n*bs2;
1321         x += bs;
1322         workt = work;
1323         for ( j=0; j<n; j++ ) {
1324           zb = z + bs*(*idx++);
1325           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1326           workt += bs;
1327         }
1328       }
1329     }
1330   }
1331   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1333   PLogFlops(2*a->nz*a->bs2);
1334   return 0;
1335 }
1336 
1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
1338 {
1339   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1340   if (nz)  *nz  = a->bs2*a->nz;
1341   if (nza) *nza = a->maxnz;
1342   if (mem) *mem = (int)A->mem;
1343   return 0;
1344 }
1345 
1346 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1347 {
1348   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1349 
1350   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1351 
1352   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1353   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1354       (a->nz != b->nz)) {
1355     *flg = PETSC_FALSE; return 0;
1356   }
1357 
1358   /* if the a->i are the same */
1359   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1360     *flg = PETSC_FALSE; return 0;
1361   }
1362 
1363   /* if a->j are the same */
1364   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1365     *flg = PETSC_FALSE; return 0;
1366   }
1367 
1368   /* if a->a are the same */
1369   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1370     *flg = PETSC_FALSE; return 0;
1371   }
1372   *flg = PETSC_TRUE;
1373   return 0;
1374 
1375 }
1376 
1377 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1378 {
1379   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1380   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1381   Scalar      *x, zero = 0.0,*aa,*aa_j;
1382 
1383   bs   = a->bs;
1384   aa   = a->a;
1385   ai   = a->i;
1386   aj   = a->j;
1387   ambs = a->mbs;
1388   bs2  = a->bs2;
1389 
1390   VecSet(&zero,v);
1391   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1392   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1393   for ( i=0; i<ambs; i++ ) {
1394     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1395       if (aj[j] == i) {
1396         row  = i*bs;
1397         aa_j = aa+j*bs2;
1398         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1399         break;
1400       }
1401     }
1402   }
1403   return 0;
1404 }
1405 
1406 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1407 {
1408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1409   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1410   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1411 
1412   ai  = a->i;
1413   aj  = a->j;
1414   aa  = a->a;
1415   m   = a->m;
1416   n   = a->n;
1417   bs  = a->bs;
1418   mbs = a->mbs;
1419   bs2 = a->bs2;
1420   if (ll) {
1421     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1422     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1423     for ( i=0; i<mbs; i++ ) { /* for each block row */
1424       M  = ai[i+1] - ai[i];
1425       li = l + i*bs;
1426       v  = aa + bs2*ai[i];
1427       for ( j=0; j<M; j++ ) { /* for each block */
1428         for ( k=0; k<bs2; k++ ) {
1429           (*v++) *= li[k%bs];
1430         }
1431       }
1432     }
1433   }
1434 
1435   if (rr) {
1436     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1437     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1438     for ( i=0; i<mbs; i++ ) { /* for each block row */
1439       M  = ai[i+1] - ai[i];
1440       v  = aa + bs2*ai[i];
1441       for ( j=0; j<M; j++ ) { /* for each block */
1442         ri = r + bs*aj[ai[i]+j];
1443         for ( k=0; k<bs; k++ ) {
1444           x = ri[k];
1445           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1446         }
1447       }
1448     }
1449   }
1450   return 0;
1451 }
1452 
1453 
1454 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1455 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1456 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1457 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1458 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1459 
1460 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1461 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1462 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1463 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1464 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1465 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1466 
1467 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1468 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1469 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1470 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1471 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1472 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1473 
1474 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1475 {
1476   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1477   Scalar      *v = a->a;
1478   double      sum = 0.0;
1479   int         i,nz=a->nz,bs2=a->bs2;
1480 
1481   if (type == NORM_FROBENIUS) {
1482     for (i=0; i< bs2*nz; i++ ) {
1483 #if defined(PETSC_COMPLEX)
1484       sum += real(conj(*v)*(*v)); v++;
1485 #else
1486       sum += (*v)*(*v); v++;
1487 #endif
1488     }
1489     *norm = sqrt(sum);
1490   }
1491   else {
1492     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1493   }
1494   return 0;
1495 }
1496 
1497 /*
1498      note: This can only work for identity for row and col. It would
1499    be good to check this and otherwise generate an error.
1500 */
1501 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1502 {
1503   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1504   Mat         outA;
1505   int         ierr;
1506 
1507   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1508 
1509   outA          = inA;
1510   inA->factor   = FACTOR_LU;
1511   a->row        = row;
1512   a->col        = col;
1513 
1514   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1515 
1516   if (!a->diag) {
1517     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1518   }
1519   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1520   return 0;
1521 }
1522 
1523 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1524 {
1525   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1526   int         one = 1, totalnz = a->bs2*a->nz;
1527   BLscal_( &totalnz, alpha, a->a, &one );
1528   PLogFlops(totalnz);
1529   return 0;
1530 }
1531 
1532 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1533 {
1534   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1535   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1536   int        *ai = a->i, *ailen = a->ilen;
1537   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1538   Scalar     *ap, *aa = a->a, zero = 0.0;
1539 
1540   for ( k=0; k<m; k++ ) { /* loop over rows */
1541     row  = im[k]; brow = row/bs;
1542     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1543     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1544     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1545     nrow = ailen[brow];
1546     for ( l=0; l<n; l++ ) { /* loop over columns */
1547       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1548       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1549       col  = in[l] ;
1550       bcol = col/bs;
1551       cidx = col%bs;
1552       ridx = row%bs;
1553       high = nrow;
1554       low  = 0; /* assume unsorted */
1555       while (high-low > 5) {
1556         t = (low+high)/2;
1557         if (rp[t] > bcol) high = t;
1558         else             low  = t;
1559       }
1560       for ( i=low; i<high; i++ ) {
1561         if (rp[i] > bcol) break;
1562         if (rp[i] == bcol) {
1563           *v++ = ap[bs2*i+bs*cidx+ridx];
1564           goto finished;
1565         }
1566       }
1567       *v++ = zero;
1568       finished:;
1569     }
1570   }
1571   return 0;
1572 }
1573 
1574 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1575 {
1576   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1577   *bs = baij->bs;
1578   return 0;
1579 }
1580 
1581 /* idx should be of length atlease bs */
1582 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1583 {
1584   int i,row;
1585   row = idx[0];
1586   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1587 
1588   for ( i=1; i<bs; i++ ) {
1589     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1590   }
1591   *flg = PETSC_TRUE;
1592   return 0;
1593 }
1594 
1595 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1596 {
1597   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1598   IS          is_local;
1599   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1600   PetscTruth  flg;
1601   Scalar      zero = 0.0,*aa;
1602 
1603   /* Make a copy of the IS and  sort it */
1604   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1605   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1606   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1607   ierr = ISSort(is_local); CHKERRQ(ierr);
1608   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1609 
1610   i = 0;
1611   while (i < is_n) {
1612     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1613     flg = PETSC_FALSE;
1614     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1615     if (flg) { /* There exists a block of rows to be Zerowed */
1616       baij->ilen[rows[i]/bs] = 0;
1617       i += bs;
1618     } else { /* Zero out only the requested row */
1619       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1620       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1621       for ( j=0; j<count; j++ ) {
1622         aa[0] = zero;
1623         aa+=bs;
1624       }
1625       i++;
1626     }
1627   }
1628   if (diag) {
1629     for ( j=0; j<is_n; j++ ) {
1630       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1631     }
1632   }
1633   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1634   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1635   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1636   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1637   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1638 
1639   return 0;
1640 }
1641 int MatPrintHelp_SeqBAIJ(Mat A)
1642 {
1643   static int called = 0;
1644   MPI_Comm   comm = A->comm;
1645 
1646   if (called) return 0; else called = 1;
1647   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1648   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1649   return 0;
1650 }
1651 
1652 /* -------------------------------------------------------------------*/
1653 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1654        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1655        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1656        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1657        MatSolve_SeqBAIJ_N,0,
1658        0,0,
1659        MatLUFactor_SeqBAIJ,0,
1660        0,
1661        MatTranspose_SeqBAIJ,
1662        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1663        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1664        0,MatAssemblyEnd_SeqBAIJ,
1665        0,
1666        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1667        MatGetReordering_SeqBAIJ,
1668        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1669        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1670        MatILUFactorSymbolic_SeqBAIJ,0,
1671        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1672        MatConvertSameType_SeqBAIJ,0,0,
1673        MatILUFactor_SeqBAIJ,0,0,
1674        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1675        MatGetValues_SeqBAIJ,0,
1676        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1677        0,0,0,MatGetBlockSize_SeqBAIJ};
1678 
1679 /*@C
1680    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1681    compressed row) format.  For good matrix assembly performance the
1682    user should preallocate the matrix storage by setting the parameter nz
1683    (or the array nzz).  By setting these parameters accurately, performance
1684    during matrix assembly can be increased by more than a factor of 50.
1685 
1686    Input Parameters:
1687 .  comm - MPI communicator, set to MPI_COMM_SELF
1688 .  bs - size of block
1689 .  m - number of rows
1690 .  n - number of columns
1691 .  nz - number of block nonzeros per block row (same for all rows)
1692 .  nzz - array containing the number of block nonzeros in the various block rows
1693          (possibly different for each block row) or PETSC_NULL
1694 
1695    Output Parameter:
1696 .  A - the matrix
1697 
1698    Notes:
1699    The block AIJ format is fully compatible with standard Fortran 77
1700    storage.  That is, the stored row and column indices can begin at
1701    either one (as in Fortran) or zero.  See the users' manual for details.
1702 
1703    Specify the preallocated storage with either nz or nnz (not both).
1704    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1705    allocation.  For additional details, see the users manual chapter on
1706    matrices and the file $(PETSC_DIR)/Performance.
1707 
1708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1709 @*/
1710 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1711 {
1712   Mat         B;
1713   Mat_SeqBAIJ *b;
1714   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1715 
1716   if (mbs*bs!=m || nbs*bs!=n)
1717     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1718 
1719   *A = 0;
1720   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1721   PLogObjectCreate(B);
1722   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1723   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1724   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1725   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1726   if (!flg) {
1727     switch (bs) {
1728       case 1:
1729         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1730         B->ops.solve           = MatSolve_SeqBAIJ_1;
1731         B->ops.mult            = MatMult_SeqBAIJ_1;
1732         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1733        break;
1734       case 2:
1735         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1736         B->ops.solve           = MatSolve_SeqBAIJ_2;
1737         B->ops.mult            = MatMult_SeqBAIJ_2;
1738         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1739         break;
1740       case 3:
1741         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1742         B->ops.solve           = MatSolve_SeqBAIJ_3;
1743         B->ops.mult            = MatMult_SeqBAIJ_3;
1744         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1745         break;
1746       case 4:
1747         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1748         B->ops.solve           = MatSolve_SeqBAIJ_4;
1749         B->ops.mult            = MatMult_SeqBAIJ_4;
1750         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1751         break;
1752       case 5:
1753         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1754         B->ops.solve           = MatSolve_SeqBAIJ_5;
1755         B->ops.mult            = MatMult_SeqBAIJ_5;
1756         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1757         break;
1758     }
1759   }
1760   B->destroy          = MatDestroy_SeqBAIJ;
1761   B->view             = MatView_SeqBAIJ;
1762   B->factor           = 0;
1763   B->lupivotthreshold = 1.0;
1764   b->row              = 0;
1765   b->col              = 0;
1766   b->reallocs         = 0;
1767 
1768   b->m       = m; B->m = m; B->M = m;
1769   b->n       = n; B->n = n; B->N = n;
1770   b->mbs     = mbs;
1771   b->nbs     = nbs;
1772   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1773   if (nnz == PETSC_NULL) {
1774     if (nz == PETSC_DEFAULT) nz = 5;
1775     else if (nz <= 0)        nz = 1;
1776     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1777     nz = nz*mbs;
1778   }
1779   else {
1780     nz = 0;
1781     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1782   }
1783 
1784   /* allocate the matrix space */
1785   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1786   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1787   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1788   b->j  = (int *) (b->a + nz*bs2);
1789   PetscMemzero(b->j,nz*sizeof(int));
1790   b->i  = b->j + nz;
1791   b->singlemalloc = 1;
1792 
1793   b->i[0] = 0;
1794   for (i=1; i<mbs+1; i++) {
1795     b->i[i] = b->i[i-1] + b->imax[i-1];
1796   }
1797 
1798   /* b->ilen will count nonzeros in each block row so far. */
1799   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1800   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1801   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1802 
1803   b->bs               = bs;
1804   b->bs2              = bs2;
1805   b->mbs              = mbs;
1806   b->nz               = 0;
1807   b->maxnz            = nz*bs2;
1808   b->sorted           = 0;
1809   b->roworiented      = 1;
1810   b->nonew            = 0;
1811   b->diag             = 0;
1812   b->solve_work       = 0;
1813   b->mult_work        = 0;
1814   b->spptr            = 0;
1815   *A = B;
1816   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1817   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1818   return 0;
1819 }
1820 
1821 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1822 {
1823   Mat         C;
1824   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1825   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1826 
1827   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1828 
1829   *B = 0;
1830   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1831   PLogObjectCreate(C);
1832   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1833   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1834   C->destroy    = MatDestroy_SeqBAIJ;
1835   C->view       = MatView_SeqBAIJ;
1836   C->factor     = A->factor;
1837   c->row        = 0;
1838   c->col        = 0;
1839   C->assembled  = PETSC_TRUE;
1840 
1841   c->m = C->m   = a->m;
1842   c->n = C->n   = a->n;
1843   C->M          = a->m;
1844   C->N          = a->n;
1845 
1846   c->bs         = a->bs;
1847   c->bs2        = a->bs2;
1848   c->mbs        = a->mbs;
1849   c->nbs        = a->nbs;
1850 
1851   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1852   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1853   for ( i=0; i<mbs; i++ ) {
1854     c->imax[i] = a->imax[i];
1855     c->ilen[i] = a->ilen[i];
1856   }
1857 
1858   /* allocate the matrix space */
1859   c->singlemalloc = 1;
1860   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1861   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1862   c->j  = (int *) (c->a + nz*bs2);
1863   c->i  = c->j + nz;
1864   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1865   if (mbs > 0) {
1866     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1867     if (cpvalues == COPY_VALUES) {
1868       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1869     }
1870   }
1871 
1872   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1873   c->sorted      = a->sorted;
1874   c->roworiented = a->roworiented;
1875   c->nonew       = a->nonew;
1876 
1877   if (a->diag) {
1878     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1879     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1880     for ( i=0; i<mbs; i++ ) {
1881       c->diag[i] = a->diag[i];
1882     }
1883   }
1884   else c->diag          = 0;
1885   c->nz                 = a->nz;
1886   c->maxnz              = a->maxnz;
1887   c->solve_work         = 0;
1888   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1889   c->mult_work          = 0;
1890   *B = C;
1891   return 0;
1892 }
1893 
1894 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1895 {
1896   Mat_SeqBAIJ  *a;
1897   Mat          B;
1898   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1899   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1900   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1901   int          *masked, nmask,tmp,bs2,ishift;
1902   Scalar       *aa;
1903   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1904 
1905   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1906   bs2  = bs*bs;
1907 
1908   MPI_Comm_size(comm,&size);
1909   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1910   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1911   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1912   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1913   M = header[1]; N = header[2]; nz = header[3];
1914 
1915   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1916 
1917   /*
1918      This code adds extra rows to make sure the number of rows is
1919     divisible by the blocksize
1920   */
1921   mbs        = M/bs;
1922   extra_rows = bs - M + bs*(mbs);
1923   if (extra_rows == bs) extra_rows = 0;
1924   else                  mbs++;
1925   if (extra_rows) {
1926     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1927   }
1928 
1929   /* read in row lengths */
1930   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1931   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1932   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1933 
1934   /* read in column indices */
1935   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1936   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1937   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1938 
1939   /* loop over row lengths determining block row lengths */
1940   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1941   PetscMemzero(browlengths,mbs*sizeof(int));
1942   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1943   PetscMemzero(mask,mbs*sizeof(int));
1944   masked = mask + mbs;
1945   rowcount = 0; nzcount = 0;
1946   for ( i=0; i<mbs; i++ ) {
1947     nmask = 0;
1948     for ( j=0; j<bs; j++ ) {
1949       kmax = rowlengths[rowcount];
1950       for ( k=0; k<kmax; k++ ) {
1951         tmp = jj[nzcount++]/bs;
1952         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1953       }
1954       rowcount++;
1955     }
1956     browlengths[i] += nmask;
1957     /* zero out the mask elements we set */
1958     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1959   }
1960 
1961   /* create our matrix */
1962   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1963          CHKERRQ(ierr);
1964   B = *A;
1965   a = (Mat_SeqBAIJ *) B->data;
1966 
1967   /* set matrix "i" values */
1968   a->i[0] = 0;
1969   for ( i=1; i<= mbs; i++ ) {
1970     a->i[i]      = a->i[i-1] + browlengths[i-1];
1971     a->ilen[i-1] = browlengths[i-1];
1972   }
1973   a->nz         = 0;
1974   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1975 
1976   /* read in nonzero values */
1977   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1978   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1979   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1980 
1981   /* set "a" and "j" values into matrix */
1982   nzcount = 0; jcount = 0;
1983   for ( i=0; i<mbs; i++ ) {
1984     nzcountb = nzcount;
1985     nmask    = 0;
1986     for ( j=0; j<bs; j++ ) {
1987       kmax = rowlengths[i*bs+j];
1988       for ( k=0; k<kmax; k++ ) {
1989         tmp = jj[nzcount++]/bs;
1990 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1991       }
1992       rowcount++;
1993     }
1994     /* sort the masked values */
1995     PetscSortInt(nmask,masked);
1996 
1997     /* set "j" values into matrix */
1998     maskcount = 1;
1999     for ( j=0; j<nmask; j++ ) {
2000       a->j[jcount++]  = masked[j];
2001       mask[masked[j]] = maskcount++;
2002     }
2003     /* set "a" values into matrix */
2004     ishift = bs2*a->i[i];
2005     for ( j=0; j<bs; j++ ) {
2006       kmax = rowlengths[i*bs+j];
2007       for ( k=0; k<kmax; k++ ) {
2008         tmp    = jj[nzcountb]/bs ;
2009         block  = mask[tmp] - 1;
2010         point  = jj[nzcountb] - bs*tmp;
2011         idx    = ishift + bs2*block + j + bs*point;
2012         a->a[idx] = aa[nzcountb++];
2013       }
2014     }
2015     /* zero out the mask elements we set */
2016     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2017   }
2018   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2019 
2020   PetscFree(rowlengths);
2021   PetscFree(browlengths);
2022   PetscFree(aa);
2023   PetscFree(jj);
2024   PetscFree(mask);
2025 
2026   B->assembled = PETSC_TRUE;
2027 
2028   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2029   if (flg) {
2030     Viewer tviewer;
2031     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2032     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
2033     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2034     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2035   }
2036   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2037   if (flg) {
2038     Viewer tviewer;
2039     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2040     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
2041     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2042     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2043   }
2044   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2045   if (flg) {
2046     Viewer tviewer;
2047     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2048     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2049     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2050   }
2051   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2052   if (flg) {
2053     Viewer tviewer;
2054     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2055     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
2056     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2057     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2058   }
2059   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2060   if (flg) {
2061     Viewer tviewer;
2062     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2063     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2064     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2065     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2066   }
2067   return 0;
2068 }
2069 
2070 
2071 
2072