xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c16cb8f2de640b3af1673495cb6d811eff24116a)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.62 1996/08/01 22:21:46 balay Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
345 {
346   Mat         A = (Mat) obj;
347   ViewerType  vtype;
348   int         ierr;
349 
350   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
351   if (vtype == MATLAB_VIEWER) {
352     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
353   }
354   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
355     return MatView_SeqBAIJ_ASCII(A,viewer);
356   }
357   else if (vtype == BINARY_FILE_VIEWER) {
358     return MatView_SeqBAIJ_Binary(A,viewer);
359   }
360   else if (vtype == DRAW_VIEWER) {
361     return MatView_SeqBAIJ_Draw(A,viewer);
362   }
363   return 0;
364 }
365 
366 #define CHUNKSIZE  10
367 
368 /* This version has row oriented v  */
369 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
370 {
371   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
372   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
373   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
374   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
375   int          ridx,cidx,bs2=a->bs2;
376   Scalar      *ap,value,*aa=a->a,*bap;
377 
378   for ( k=0; k<m; k++ ) { /* loop over added rows */
379     row  = im[k]; brow = row/bs;
380     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
381     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
382     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
383     rmax = imax[brow]; nrow = ailen[brow];
384     low = 0;
385     for ( l=0; l<n; l++ ) { /* loop over added columns */
386       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
387       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
388       col = in[l]; bcol = col/bs;
389       ridx = row % bs; cidx = col % bs;
390       if (roworiented) {
391         value = *v++;
392       }
393       else {
394         value = v[k + l*m];
395       }
396       if (!sorted) low = 0; high = nrow;
397       while (high-low > 5) {
398         t = (low+high)/2;
399         if (rp[t] > bcol) high = t;
400         else             low  = t;
401       }
402       for ( i=low; i<high; i++ ) {
403         if (rp[i] > bcol) break;
404         if (rp[i] == bcol) {
405           bap  = ap +  bs2*i + bs*cidx + ridx;
406           if (is == ADD_VALUES) *bap += value;
407           else                  *bap  = value;
408           goto noinsert;
409         }
410       }
411       if (nonew) goto noinsert;
412       if (nrow >= rmax) {
413         /* there is no extra room in row, therefore enlarge */
414         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
415         Scalar *new_a;
416 
417         /* malloc new storage space */
418         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
419         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
420         new_j   = (int *) (new_a + bs2*new_nz);
421         new_i   = new_j + new_nz;
422 
423         /* copy over old data into new slots */
424         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
425         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
426         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
427         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
428         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
429                                                            len*sizeof(int));
430         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
431         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
432         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
433                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
434         /* free up old matrix storage */
435         PetscFree(a->a);
436         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
437         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
438         a->singlemalloc = 1;
439 
440         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
441         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
442         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
443         a->maxnz += bs2*CHUNKSIZE;
444         a->reallocs++;
445         a->nz++;
446       }
447       N = nrow++ - 1;
448       /* shift up all the later entries in this row */
449       for ( ii=N; ii>=i; ii-- ) {
450         rp[ii+1] = rp[ii];
451          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
452       }
453       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
454       rp[i] = bcol;
455       ap[bs2*i + bs*cidx + ridx] = value;
456       noinsert:;
457       low = i;
458     }
459     ailen[brow] = nrow;
460   }
461   return 0;
462 }
463 
464 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
465 {
466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
467   *m = a->m; *n = a->n;
468   return 0;
469 }
470 
471 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
472 {
473   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
474   *m = 0; *n = a->m;
475   return 0;
476 }
477 
478 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
479 {
480   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
481   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
482   Scalar      *aa,*v_i,*aa_i;
483 
484   bs  = a->bs;
485   ai  = a->i;
486   aj  = a->j;
487   aa  = a->a;
488   bs2 = a->bs2;
489 
490   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
491 
492   bn  = row/bs;   /* Block number */
493   bp  = row % bs; /* Block Position */
494   M   = ai[bn+1] - ai[bn];
495   *nz = bs*M;
496 
497   if (v) {
498     *v = 0;
499     if (*nz) {
500       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
501       for ( i=0; i<M; i++ ) { /* for each block in the block row */
502         v_i  = *v + i*bs;
503         aa_i = aa + bs2*(ai[bn] + i);
504         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
505       }
506     }
507   }
508 
509   if (idx) {
510     *idx = 0;
511     if (*nz) {
512       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
513       for ( i=0; i<M; i++ ) { /* for each block in the block row */
514         idx_i = *idx + i*bs;
515         itmp  = bs*aj[ai[bn] + i];
516         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
517       }
518     }
519   }
520   return 0;
521 }
522 
523 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
524 {
525   if (idx) {if (*idx) PetscFree(*idx);}
526   if (v)   {if (*v)   PetscFree(*v);}
527   return 0;
528 }
529 
530 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
531 {
532   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
533   Mat         C;
534   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
535   int         *rows,*cols,bs2=a->bs2;
536   Scalar      *array=a->a;
537 
538   if (B==PETSC_NULL && mbs!=nbs)
539     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
540   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
541   PetscMemzero(col,(1+nbs)*sizeof(int));
542 
543   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
544   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
545   PetscFree(col);
546   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
547   cols = rows + bs;
548   for ( i=0; i<mbs; i++ ) {
549     cols[0] = i*bs;
550     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
551     len = ai[i+1] - ai[i];
552     for ( j=0; j<len; j++ ) {
553       rows[0] = (*aj++)*bs;
554       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
555       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
556       array += bs2;
557     }
558   }
559   PetscFree(rows);
560 
561   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
562   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
563 
564   if (B != PETSC_NULL) {
565     *B = C;
566   } else {
567     /* This isn't really an in-place transpose */
568     PetscFree(a->a);
569     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
570     if (a->diag) PetscFree(a->diag);
571     if (a->ilen) PetscFree(a->ilen);
572     if (a->imax) PetscFree(a->imax);
573     if (a->solve_work) PetscFree(a->solve_work);
574     PetscFree(a);
575     PetscMemcpy(A,C,sizeof(struct _Mat));
576     PetscHeaderDestroy(C);
577   }
578   return 0;
579 }
580 
581 
582 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
583 {
584   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
585   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
586   int        m = a->m,*ip, N, *ailen = a->ilen;
587   int        mbs = a->mbs, bs2 = a->bs2;
588   Scalar     *aa = a->a, *ap;
589 
590   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
591 
592   for ( i=1; i<mbs; i++ ) {
593     /* move each row back by the amount of empty slots (fshift) before it*/
594     fshift += imax[i-1] - ailen[i-1];
595     if (fshift) {
596       ip = aj + ai[i]; ap = aa + bs2*ai[i];
597       N = ailen[i];
598       for ( j=0; j<N; j++ ) {
599         ip[j-fshift] = ip[j];
600         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
601       }
602     }
603     ai[i] = ai[i-1] + ailen[i-1];
604   }
605   if (mbs) {
606     fshift += imax[mbs-1] - ailen[mbs-1];
607     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
608   }
609   /* reset ilen and imax for each row */
610   for ( i=0; i<mbs; i++ ) {
611     ailen[i] = imax[i] = ai[i+1] - ai[i];
612   }
613   a->nz = ai[mbs];
614 
615   /* diagonals may have moved, so kill the diagonal pointers */
616   if (fshift && a->diag) {
617     PetscFree(a->diag);
618     PLogObjectMemory(A,-(m+1)*sizeof(int));
619     a->diag = 0;
620   }
621   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
622   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
623            a->reallocs);
624   return 0;
625 }
626 
627 static int MatZeroEntries_SeqBAIJ(Mat A)
628 {
629   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
630   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
631   return 0;
632 }
633 
634 int MatDestroy_SeqBAIJ(PetscObject obj)
635 {
636   Mat         A  = (Mat) obj;
637   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
638 
639 #if defined(PETSC_LOG)
640   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
641 #endif
642   PetscFree(a->a);
643   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
644   if (a->diag) PetscFree(a->diag);
645   if (a->ilen) PetscFree(a->ilen);
646   if (a->imax) PetscFree(a->imax);
647   if (a->solve_work) PetscFree(a->solve_work);
648   if (a->mult_work) PetscFree(a->mult_work);
649   PetscFree(a);
650   PLogObjectDestroy(A);
651   PetscHeaderDestroy(A);
652   return 0;
653 }
654 
655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
656 {
657   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
658   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
659   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
660   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
661   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
662   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
663   else if (op == MAT_ROWS_SORTED ||
664            op == MAT_SYMMETRIC ||
665            op == MAT_STRUCTURALLY_SYMMETRIC ||
666            op == MAT_YES_NEW_DIAGONALS)
667     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
668   else if (op == MAT_NO_NEW_DIAGONALS)
669     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
670   else
671     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
672   return 0;
673 }
674 
675 
676 /* -------------------------------------------------------*/
677 /* Should check that shapes of vectors and matrices match */
678 /* -------------------------------------------------------*/
679 #include "pinclude/plapack.h"
680 
681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
682 {
683   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
684   register Scalar *x,*z,*v,sum;
685   int             mbs=a->mbs,i,*idx,*ii,n;
686 
687   VecGetArray_Fast(xx,x);
688   VecGetArray_Fast(zz,z);
689 
690   idx   = a->j;
691   v     = a->a;
692   ii    = a->i;
693 
694   for ( i=0; i<mbs; i++ ) {
695     n    = ii[1] - ii[0]; ii++;
696     sum  = 0.0;
697     while (n--) sum += *v++ * x[*idx++];
698     z[i] = sum;
699   }
700   VecRestoreArray_Fast(xx,x);
701   VecRestoreArray_Fast(zz,z);
702   PLogFlops(2*a->nz - a->m);
703   return 0;
704 }
705 
706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
707 {
708   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
709   register Scalar *x,*z,*v,*xb,sum1,sum2;
710   register Scalar x1,x2;
711   int             mbs=a->mbs,i,*idx,*ii;
712   int             j,n;
713 
714   VecGetArray_Fast(xx,x);
715   VecGetArray_Fast(zz,z);
716 
717   idx   = a->j;
718   v     = a->a;
719   ii    = a->i;
720 
721   for ( i=0; i<mbs; i++ ) {
722     n  = ii[1] - ii[0]; ii++;
723     sum1 = 0.0; sum2 = 0.0;
724     for ( j=0; j<n; j++ ) {
725       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
726       sum1 += v[0]*x1 + v[2]*x2;
727       sum2 += v[1]*x1 + v[3]*x2;
728       v += 4;
729     }
730     z[0] = sum1; z[1] = sum2;
731     z += 2;
732   }
733   VecRestoreArray_Fast(xx,x);
734   VecRestoreArray_Fast(zz,z);
735   PLogFlops(4*a->nz - a->m);
736   return 0;
737 }
738 
739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
740 {
741   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
742   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
743   int             mbs=a->mbs,i,*idx,*ii,j,n;
744 
745   VecGetArray_Fast(xx,x);
746   VecGetArray_Fast(zz,z);
747 
748   idx   = a->j;
749   v     = a->a;
750   ii    = a->i;
751 
752   for ( i=0; i<mbs; i++ ) {
753     n  = ii[1] - ii[0]; ii++;
754     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
755     for ( j=0; j<n; j++ ) {
756       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
758       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
759       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
760       v += 9;
761     }
762     z[0] = sum1; z[1] = sum2; z[2] = sum3;
763     z += 3;
764   }
765   VecRestoreArray_Fast(xx,x);
766   VecRestoreArray_Fast(zz,z);
767   PLogFlops(18*a->nz - a->m);
768   return 0;
769 }
770 
771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
772 {
773   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
774   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
775   register Scalar x1,x2,x3,x4;
776   int             mbs=a->mbs,i,*idx,*ii;
777   int             j,n;
778 
779   VecGetArray_Fast(xx,x);
780   VecGetArray_Fast(zz,z);
781 
782   idx   = a->j;
783   v     = a->a;
784   ii    = a->i;
785 
786   for ( i=0; i<mbs; i++ ) {
787     n  = ii[1] - ii[0]; ii++;
788     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
789     for ( j=0; j<n; j++ ) {
790       xb = x + 4*(*idx++);
791       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
792       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
793       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
794       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
795       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
796       v += 16;
797     }
798     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
799     z += 4;
800   }
801   VecRestoreArray_Fast(xx,x);
802   VecRestoreArray_Fast(zz,z);
803   PLogFlops(32*a->nz - a->m);
804   return 0;
805 }
806 
807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
808 {
809   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
810   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
811   register Scalar x1,x2,x3,x4,x5;
812   int             mbs=a->mbs,i,*idx,*ii,j,n;
813 
814   VecGetArray_Fast(xx,x);
815   VecGetArray_Fast(zz,z);
816 
817   idx   = a->j;
818   v     = a->a;
819   ii    = a->i;
820 
821   for ( i=0; i<mbs; i++ ) {
822     n  = ii[1] - ii[0]; ii++;
823     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
824     for ( j=0; j<n; j++ ) {
825       xb = x + 5*(*idx++);
826       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
827       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
828       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
829       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
830       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
831       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
832       v += 25;
833     }
834     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
835     z += 5;
836   }
837   VecRestoreArray_Fast(xx,x);
838   VecRestoreArray_Fast(zz,z);
839   PLogFlops(50*a->nz - a->m);
840   return 0;
841 }
842 
843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
844 {
845   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
846   register Scalar *x,*z,*v,*xb;
847   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
848   int             _One = 1,ncols,k;
849   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
850 
851   VecGetArray_Fast(xx,x);
852   VecGetArray_Fast(zz,z);
853 
854   idx   = a->j;
855   v     = a->a;
856   ii    = a->i;
857 
858 
859   if (!a->mult_work) {
860     k = PetscMax(a->m,a->n);
861     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
862   }
863   work = a->mult_work;
864   for ( i=0; i<mbs; i++ ) {
865     n     = ii[1] - ii[0]; ii++;
866     ncols = n*bs;
867     workt = work;
868     for ( j=0; j<n; j++ ) {
869       xb = x + bs*(*idx++);
870       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
871       workt += bs;
872     }
873     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
874     v += n*bs2;
875     z += bs;
876   }
877   VecRestoreArray_Fast(xx,x);
878   VecRestoreArray_Fast(zz,z);
879   PLogFlops(2*a->nz*bs2 - a->m);
880   return 0;
881 }
882 
883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
884 {
885   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
886   register Scalar *x,*y,*z,*v,sum;
887   int             mbs=a->mbs,i,*idx,*ii,n;
888 
889   VecGetArray_Fast(xx,x);
890   VecGetArray_Fast(yy,y);
891   VecGetArray_Fast(zz,z);
892 
893   idx   = a->j;
894   v     = a->a;
895   ii    = a->i;
896 
897   for ( i=0; i<mbs; i++ ) {
898     n    = ii[1] - ii[0]; ii++;
899     sum  = y[i];
900     while (n--) sum += *v++ * x[*idx++];
901     z[i] = sum;
902   }
903   VecRestoreArray_Fast(xx,x);
904   VecRestoreArray_Fast(yy,y);
905   VecRestoreArray_Fast(zz,z);
906   PLogFlops(2*a->nz);
907   return 0;
908 }
909 
910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
911 {
912   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
913   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
914   register Scalar x1,x2;
915   int             mbs=a->mbs,i,*idx,*ii;
916   int             j,n;
917 
918   VecGetArray_Fast(xx,x);
919   VecGetArray_Fast(yy,y);
920   VecGetArray_Fast(zz,z);
921 
922   idx   = a->j;
923   v     = a->a;
924   ii    = a->i;
925 
926   for ( i=0; i<mbs; i++ ) {
927     n  = ii[1] - ii[0]; ii++;
928     sum1 = y[0]; sum2 = y[1];
929     for ( j=0; j<n; j++ ) {
930       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
931       sum1 += v[0]*x1 + v[2]*x2;
932       sum2 += v[1]*x1 + v[3]*x2;
933       v += 4;
934     }
935     z[0] = sum1; z[1] = sum2;
936     z += 2; y += 2;
937   }
938   VecRestoreArray_Fast(xx,x);
939   VecRestoreArray_Fast(yy,y);
940   VecRestoreArray_Fast(zz,z);
941   PLogFlops(4*a->nz);
942   return 0;
943 }
944 
945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
946 {
947   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
948   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
949   int             mbs=a->mbs,i,*idx,*ii,j,n;
950 
951   VecGetArray_Fast(xx,x);
952   VecGetArray_Fast(yy,y);
953   VecGetArray_Fast(zz,z);
954 
955   idx   = a->j;
956   v     = a->a;
957   ii    = a->i;
958 
959   for ( i=0; i<mbs; i++ ) {
960     n  = ii[1] - ii[0]; ii++;
961     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
962     for ( j=0; j<n; j++ ) {
963       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
964       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
965       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
966       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
967       v += 9;
968     }
969     z[0] = sum1; z[1] = sum2; z[2] = sum3;
970     z += 3; y += 3;
971   }
972   VecRestoreArray_Fast(xx,x);
973   VecRestoreArray_Fast(yy,y);
974   VecRestoreArray_Fast(zz,z);
975   PLogFlops(18*a->nz);
976   return 0;
977 }
978 
979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
982   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
983   register Scalar x1,x2,x3,x4;
984   int             mbs=a->mbs,i,*idx,*ii;
985   int             j,n;
986 
987   VecGetArray_Fast(xx,x);
988   VecGetArray_Fast(yy,y);
989   VecGetArray_Fast(zz,z);
990 
991   idx   = a->j;
992   v     = a->a;
993   ii    = a->i;
994 
995   for ( i=0; i<mbs; i++ ) {
996     n  = ii[1] - ii[0]; ii++;
997     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
998     for ( j=0; j<n; j++ ) {
999       xb = x + 4*(*idx++);
1000       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1001       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1002       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1003       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1004       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1005       v += 16;
1006     }
1007     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1008     z += 4; y += 4;
1009   }
1010   VecRestoreArray_Fast(xx,x);
1011   VecRestoreArray_Fast(yy,y);
1012   VecRestoreArray_Fast(zz,z);
1013   PLogFlops(32*a->nz);
1014   return 0;
1015 }
1016 
1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1018 {
1019   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1020   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1021   register Scalar x1,x2,x3,x4,x5;
1022   int             mbs=a->mbs,i,*idx,*ii,j,n;
1023 
1024   VecGetArray_Fast(xx,x);
1025   VecGetArray_Fast(yy,y);
1026   VecGetArray_Fast(zz,z);
1027 
1028   idx   = a->j;
1029   v     = a->a;
1030   ii    = a->i;
1031 
1032   for ( i=0; i<mbs; i++ ) {
1033     n  = ii[1] - ii[0]; ii++;
1034     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1035     for ( j=0; j<n; j++ ) {
1036       xb = x + 5*(*idx++);
1037       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1038       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1039       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1040       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1041       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1042       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1043       v += 25;
1044     }
1045     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1046     z += 5; y += 5;
1047   }
1048   VecRestoreArray_Fast(xx,x);
1049   VecRestoreArray_Fast(yy,y);
1050   VecRestoreArray_Fast(zz,z);
1051   PLogFlops(50*a->nz);
1052   return 0;
1053 }
1054 
1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1058   register Scalar *x,*z,*v,*xb;
1059   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1060   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1061 
1062   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1063 
1064   VecGetArray_Fast(xx,x);
1065   VecGetArray_Fast(zz,z);
1066 
1067   idx   = a->j;
1068   v     = a->a;
1069   ii    = a->i;
1070 
1071 
1072   if (!a->mult_work) {
1073     k = PetscMax(a->m,a->n);
1074     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1075   }
1076   work = a->mult_work;
1077   for ( i=0; i<mbs; i++ ) {
1078     n     = ii[1] - ii[0]; ii++;
1079     ncols = n*bs;
1080     workt = work;
1081     for ( j=0; j<n; j++ ) {
1082       xb = x + bs*(*idx++);
1083       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1084       workt += bs;
1085     }
1086     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1087     v += n*bs2;
1088     z += bs;
1089   }
1090   VecRestoreArray_Fast(xx,x);
1091   VecRestoreArray_Fast(zz,z);
1092   PLogFlops(2*a->nz*bs2 );
1093   return 0;
1094 }
1095 
1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1097 {
1098   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1099   Scalar          *xg,*zg,*zb;
1100   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1101   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1102   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1103 
1104 
1105   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1106   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1107   PetscMemzero(z,N*sizeof(Scalar));
1108 
1109   idx   = a->j;
1110   v     = a->a;
1111   ii    = a->i;
1112 
1113   switch (bs) {
1114   case 1:
1115     for ( i=0; i<mbs; i++ ) {
1116       n  = ii[1] - ii[0]; ii++;
1117       xb = x + i; x1 = xb[0];
1118       ib = idx + ai[i];
1119       for ( j=0; j<n; j++ ) {
1120         rval    = ib[j];
1121         z[rval] += *v++ * x1;
1122       }
1123     }
1124     break;
1125   case 2:
1126     for ( i=0; i<mbs; i++ ) {
1127       n  = ii[1] - ii[0]; ii++;
1128       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1129       ib = idx + ai[i];
1130       for ( j=0; j<n; j++ ) {
1131         rval      = ib[j]*2;
1132         z[rval++] += v[0]*x1 + v[1]*x2;
1133         z[rval++] += v[2]*x1 + v[3]*x2;
1134         v += 4;
1135       }
1136     }
1137     break;
1138   case 3:
1139     for ( i=0; i<mbs; i++ ) {
1140       n  = ii[1] - ii[0]; ii++;
1141       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1142       ib = idx + ai[i];
1143       for ( j=0; j<n; j++ ) {
1144         rval      = ib[j]*3;
1145         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1146         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1147         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1148         v += 9;
1149       }
1150     }
1151     break;
1152   case 4:
1153     for ( i=0; i<mbs; i++ ) {
1154       n  = ii[1] - ii[0]; ii++;
1155       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1156       ib = idx + ai[i];
1157       for ( j=0; j<n; j++ ) {
1158         rval      = ib[j]*4;
1159         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1160         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1161         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1162         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1163         v += 16;
1164       }
1165     }
1166     break;
1167   case 5:
1168     for ( i=0; i<mbs; i++ ) {
1169       n  = ii[1] - ii[0]; ii++;
1170       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171       x4 = xb[3];   x5 = xb[4];
1172       ib = idx + ai[i];
1173       for ( j=0; j<n; j++ ) {
1174         rval      = ib[j]*5;
1175         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180         v += 25;
1181       }
1182     }
1183     break;
1184       /* block sizes larger then 5 by 5 are handled by BLAS */
1185     default: {
1186       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1187       if (!a->mult_work) {
1188         k = PetscMax(a->m,a->n);
1189         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1190         CHKPTRQ(a->mult_work);
1191       }
1192       work = a->mult_work;
1193       for ( i=0; i<mbs; i++ ) {
1194         n     = ii[1] - ii[0]; ii++;
1195         ncols = n*bs;
1196         PetscMemzero(work,ncols*sizeof(Scalar));
1197         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1198         v += n*bs2;
1199         x += bs;
1200         workt = work;
1201         for ( j=0; j<n; j++ ) {
1202           zb = z + bs*(*idx++);
1203           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1204           workt += bs;
1205         }
1206       }
1207     }
1208   }
1209   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1210   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1211   PLogFlops(2*a->nz*a->bs2 - a->n);
1212   return 0;
1213 }
1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1217   Scalar          *xg,*zg,*zb;
1218   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1219   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1220   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1221 
1222 
1223 
1224   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1225   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1226 
1227   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1228   else PetscMemzero(z,N*sizeof(Scalar));
1229 
1230 
1231   idx   = a->j;
1232   v     = a->a;
1233   ii    = a->i;
1234 
1235   switch (bs) {
1236   case 1:
1237     for ( i=0; i<mbs; i++ ) {
1238       n  = ii[1] - ii[0]; ii++;
1239       xb = x + i; x1 = xb[0];
1240       ib = idx + ai[i];
1241       for ( j=0; j<n; j++ ) {
1242         rval    = ib[j];
1243         z[rval] += *v++ * x1;
1244       }
1245     }
1246     break;
1247   case 2:
1248     for ( i=0; i<mbs; i++ ) {
1249       n  = ii[1] - ii[0]; ii++;
1250       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1251       ib = idx + ai[i];
1252       for ( j=0; j<n; j++ ) {
1253         rval      = ib[j]*2;
1254         z[rval++] += v[0]*x1 + v[1]*x2;
1255         z[rval++] += v[2]*x1 + v[3]*x2;
1256         v += 4;
1257       }
1258     }
1259     break;
1260   case 3:
1261     for ( i=0; i<mbs; i++ ) {
1262       n  = ii[1] - ii[0]; ii++;
1263       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1264       ib = idx + ai[i];
1265       for ( j=0; j<n; j++ ) {
1266         rval      = ib[j]*3;
1267         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1268         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1269         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1270         v += 9;
1271       }
1272     }
1273     break;
1274   case 4:
1275     for ( i=0; i<mbs; i++ ) {
1276       n  = ii[1] - ii[0]; ii++;
1277       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278       ib = idx + ai[i];
1279       for ( j=0; j<n; j++ ) {
1280         rval      = ib[j]*4;
1281         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1282         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1283         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1284         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1285         v += 16;
1286       }
1287     }
1288     break;
1289   case 5:
1290     for ( i=0; i<mbs; i++ ) {
1291       n  = ii[1] - ii[0]; ii++;
1292       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1293       x4 = xb[3];   x5 = xb[4];
1294       ib = idx + ai[i];
1295       for ( j=0; j<n; j++ ) {
1296         rval      = ib[j]*5;
1297         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1298         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1299         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1300         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1301         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1302         v += 25;
1303       }
1304     }
1305     break;
1306       /* block sizes larger then 5 by 5 are handled by BLAS */
1307     default: {
1308       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1309       if (!a->mult_work) {
1310         k = PetscMax(a->m,a->n);
1311         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1312         CHKPTRQ(a->mult_work);
1313       }
1314       work = a->mult_work;
1315       for ( i=0; i<mbs; i++ ) {
1316         n     = ii[1] - ii[0]; ii++;
1317         ncols = n*bs;
1318         PetscMemzero(work,ncols*sizeof(Scalar));
1319         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1320         v += n*bs2;
1321         x += bs;
1322         workt = work;
1323         for ( j=0; j<n; j++ ) {
1324           zb = z + bs*(*idx++);
1325           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1326           workt += bs;
1327         }
1328       }
1329     }
1330   }
1331   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1333   PLogFlops(2*a->nz*a->bs2);
1334   return 0;
1335 }
1336 
1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
1338 {
1339   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1340   if (nz)  *nz  = a->bs2*a->nz;
1341   if (nza) *nza = a->maxnz;
1342   if (mem) *mem = (int)A->mem;
1343   return 0;
1344 }
1345 
1346 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1347 {
1348   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1349 
1350   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1351 
1352   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1353   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1354       (a->nz != b->nz)) {
1355     *flg = PETSC_FALSE; return 0;
1356   }
1357 
1358   /* if the a->i are the same */
1359   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1360     *flg = PETSC_FALSE; return 0;
1361   }
1362 
1363   /* if a->j are the same */
1364   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1365     *flg = PETSC_FALSE; return 0;
1366   }
1367 
1368   /* if a->a are the same */
1369   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1370     *flg = PETSC_FALSE; return 0;
1371   }
1372   *flg = PETSC_TRUE;
1373   return 0;
1374 
1375 }
1376 
1377 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1378 {
1379   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1380   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1381   Scalar      *x, zero = 0.0,*aa,*aa_j;
1382 
1383   bs   = a->bs;
1384   aa   = a->a;
1385   ai   = a->i;
1386   aj   = a->j;
1387   ambs = a->mbs;
1388   bs2  = a->bs2;
1389 
1390   VecSet(&zero,v);
1391   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1392   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1393   for ( i=0; i<ambs; i++ ) {
1394     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1395       if (aj[j] == i) {
1396         row  = i*bs;
1397         aa_j = aa+j*bs2;
1398         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1399         break;
1400       }
1401     }
1402   }
1403   return 0;
1404 }
1405 
1406 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1407 {
1408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1409   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1410   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1411 
1412   ai  = a->i;
1413   aj  = a->j;
1414   aa  = a->a;
1415   m   = a->m;
1416   n   = a->n;
1417   bs  = a->bs;
1418   mbs = a->mbs;
1419   bs2 = a->bs2;
1420   if (ll) {
1421     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1422     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1423     for ( i=0; i<mbs; i++ ) { /* for each block row */
1424       M  = ai[i+1] - ai[i];
1425       li = l + i*bs;
1426       v  = aa + bs2*ai[i];
1427       for ( j=0; j<M; j++ ) { /* for each block */
1428         for ( k=0; k<bs2; k++ ) {
1429           (*v++) *= li[k%bs];
1430         }
1431       }
1432     }
1433   }
1434 
1435   if (rr) {
1436     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1437     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1438     for ( i=0; i<mbs; i++ ) { /* for each block row */
1439       M  = ai[i+1] - ai[i];
1440       v  = aa + bs2*ai[i];
1441       for ( j=0; j<M; j++ ) { /* for each block */
1442         ri = r + bs*aj[ai[i]+j];
1443         for ( k=0; k<bs; k++ ) {
1444           x = ri[k];
1445           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1446         }
1447       }
1448     }
1449   }
1450   return 0;
1451 }
1452 
1453 
1454 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1455 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1456 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1457 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1458 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1459 
1460 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1461 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1462 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1463 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1464 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1465 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1466 
1467 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1468 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1469 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1470 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1471 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1472 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1473 
1474 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1475 {
1476   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1477   Scalar      *v = a->a;
1478   double      sum = 0.0;
1479   int         i,nz=a->nz,bs2=a->bs2;
1480 
1481   if (type == NORM_FROBENIUS) {
1482     for (i=0; i< bs2*nz; i++ ) {
1483 #if defined(PETSC_COMPLEX)
1484       sum += real(conj(*v)*(*v)); v++;
1485 #else
1486       sum += (*v)*(*v); v++;
1487 #endif
1488     }
1489     *norm = sqrt(sum);
1490   }
1491   else {
1492     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1493   }
1494   return 0;
1495 }
1496 
1497 /*
1498      note: This can only work for identity for row and col. It would
1499    be good to check this and otherwise generate an error.
1500 */
1501 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1502 {
1503   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1504   Mat         outA;
1505   int         ierr;
1506 
1507   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1508 
1509   outA          = inA;
1510   inA->factor   = FACTOR_LU;
1511   a->row        = row;
1512   a->col        = col;
1513 
1514   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1515 
1516   if (!a->diag) {
1517     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1518   }
1519   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1520   return 0;
1521 }
1522 
1523 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1524 {
1525   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1526   int         one = 1, totalnz = a->bs2*a->nz;
1527   BLscal_( &totalnz, alpha, a->a, &one );
1528   PLogFlops(totalnz);
1529   return 0;
1530 }
1531 
1532 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1533 {
1534   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1535   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1536   int        *ai = a->i, *ailen = a->ilen;
1537   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1538   Scalar     *ap, *aa = a->a, zero = 0.0;
1539 
1540   for ( k=0; k<m; k++ ) { /* loop over rows */
1541     row  = im[k]; brow = row/bs;
1542     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1543     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1544     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1545     nrow = ailen[brow];
1546     for ( l=0; l<n; l++ ) { /* loop over columns */
1547       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1548       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1549       col  = in[l] ;
1550       bcol = col/bs;
1551       cidx = col%bs;
1552       ridx = row%bs;
1553       high = nrow;
1554       low  = 0; /* assume unsorted */
1555       while (high-low > 5) {
1556         t = (low+high)/2;
1557         if (rp[t] > bcol) high = t;
1558         else             low  = t;
1559       }
1560       for ( i=low; i<high; i++ ) {
1561         if (rp[i] > bcol) break;
1562         if (rp[i] == bcol) {
1563           *v++ = ap[bs2*i+bs*cidx+ridx];
1564           goto finished;
1565         }
1566       }
1567       *v++ = zero;
1568       finished:;
1569     }
1570   }
1571   return 0;
1572 }
1573 
1574 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1575 {
1576   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1577   *bs = baij->bs;
1578   return 0;
1579 }
1580 
1581 /* idx should be of length atlease bs */
1582 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1583 {
1584   int i,row;
1585   row = idx[0];
1586   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1587 
1588   for ( i=1; i<bs; i++ ) {
1589     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1590   }
1591   *flg = PETSC_TRUE;
1592   return 0;
1593 }
1594 
1595 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1596 {
1597   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1598   IS          is_local;
1599   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1600   PetscTruth  flg;
1601   Scalar      zero = 0.0,*aa;
1602 
1603   /* Make a copy of the IS and  sort it */
1604   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1605   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1606   ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1607   ierr = ISSort(is_local); CHKERRQ(ierr);
1608   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1609 
1610   i = 0;
1611   while (i < is_n) {
1612     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1613     flg = PETSC_FALSE;
1614     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1615     if (flg) { /* There exists a block of rows to be Zerowed */
1616       baij->ilen[rows[i]/bs] = 0;
1617       i += bs;
1618     } else { /* Zero out only the requested row */
1619       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1620       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1621       for ( j=0; j<count; j++ ) {
1622         aa[0] = zero;
1623         aa+=bs;
1624       }
1625       i++;
1626     }
1627   }
1628   if (diag) {
1629     for ( j=0; j<is_n; j++ ) {
1630       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1631     }
1632   }
1633   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1634   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1635   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1636   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1637   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1638 
1639   return 0;
1640 }
1641 int MatPrintHelp_SeqBAIJ(Mat A)
1642 {
1643   static int called = 0;
1644   MPI_Comm   comm = A->comm;
1645 
1646   if (called) return 0; else called = 1;
1647   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1648   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1649   return 0;
1650 }
1651 
1652 /* -------------------------------------------------------------------*/
1653 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1654        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1655        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1656        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1657        MatSolve_SeqBAIJ_N,0,
1658        0,0,
1659        MatLUFactor_SeqBAIJ,0,
1660        0,
1661        MatTranspose_SeqBAIJ,
1662        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1663        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1664        0,MatAssemblyEnd_SeqBAIJ,
1665        0,
1666        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1667        MatGetReordering_SeqBAIJ,
1668        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1669        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1670        MatILUFactorSymbolic_SeqBAIJ,0,
1671        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1672        MatGetSubMatrix_SeqBAIJ,0,
1673        MatConvertSameType_SeqBAIJ,0,0,
1674        MatILUFactor_SeqBAIJ,0,0,
1675        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1676        MatGetValues_SeqBAIJ,0,
1677        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1678        0,0,0,MatGetBlockSize_SeqBAIJ};
1679 
1680 /*@C
1681    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1682    compressed row) format.  For good matrix assembly performance the
1683    user should preallocate the matrix storage by setting the parameter nz
1684    (or the array nzz).  By setting these parameters accurately, performance
1685    during matrix assembly can be increased by more than a factor of 50.
1686 
1687    Input Parameters:
1688 .  comm - MPI communicator, set to MPI_COMM_SELF
1689 .  bs - size of block
1690 .  m - number of rows
1691 .  n - number of columns
1692 .  nz - number of block nonzeros per block row (same for all rows)
1693 .  nzz - array containing the number of block nonzeros in the various block rows
1694          (possibly different for each block row) or PETSC_NULL
1695 
1696    Output Parameter:
1697 .  A - the matrix
1698 
1699    Notes:
1700    The block AIJ format is fully compatible with standard Fortran 77
1701    storage.  That is, the stored row and column indices can begin at
1702    either one (as in Fortran) or zero.  See the users' manual for details.
1703 
1704    Specify the preallocated storage with either nz or nnz (not both).
1705    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1706    allocation.  For additional details, see the users manual chapter on
1707    matrices and the file $(PETSC_DIR)/Performance.
1708 
1709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1710 @*/
1711 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1712 {
1713   Mat         B;
1714   Mat_SeqBAIJ *b;
1715   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1716 
1717   if (mbs*bs!=m || nbs*bs!=n)
1718     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1719 
1720   *A = 0;
1721   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1722   PLogObjectCreate(B);
1723   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1724   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1725   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1726   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1727   if (!flg) {
1728     switch (bs) {
1729       case 1:
1730         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1731         B->ops.solve           = MatSolve_SeqBAIJ_1;
1732         B->ops.mult            = MatMult_SeqBAIJ_1;
1733         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1734        break;
1735       case 2:
1736         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1737         B->ops.solve           = MatSolve_SeqBAIJ_2;
1738         B->ops.mult            = MatMult_SeqBAIJ_2;
1739         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1740         break;
1741       case 3:
1742         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1743         B->ops.solve           = MatSolve_SeqBAIJ_3;
1744         B->ops.mult            = MatMult_SeqBAIJ_3;
1745         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1746         break;
1747       case 4:
1748         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1749         B->ops.solve           = MatSolve_SeqBAIJ_4;
1750         B->ops.mult            = MatMult_SeqBAIJ_4;
1751         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1752         break;
1753       case 5:
1754         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1755         B->ops.solve           = MatSolve_SeqBAIJ_5;
1756         B->ops.mult            = MatMult_SeqBAIJ_5;
1757         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1758         break;
1759     }
1760   }
1761   B->destroy          = MatDestroy_SeqBAIJ;
1762   B->view             = MatView_SeqBAIJ;
1763   B->factor           = 0;
1764   B->lupivotthreshold = 1.0;
1765   b->row              = 0;
1766   b->col              = 0;
1767   b->reallocs         = 0;
1768 
1769   b->m       = m; B->m = m; B->M = m;
1770   b->n       = n; B->n = n; B->N = n;
1771   b->mbs     = mbs;
1772   b->nbs     = nbs;
1773   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1774   if (nnz == PETSC_NULL) {
1775     if (nz == PETSC_DEFAULT) nz = 5;
1776     else if (nz <= 0)        nz = 1;
1777     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1778     nz = nz*mbs;
1779   }
1780   else {
1781     nz = 0;
1782     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1783   }
1784 
1785   /* allocate the matrix space */
1786   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1787   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1788   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1789   b->j  = (int *) (b->a + nz*bs2);
1790   PetscMemzero(b->j,nz*sizeof(int));
1791   b->i  = b->j + nz;
1792   b->singlemalloc = 1;
1793 
1794   b->i[0] = 0;
1795   for (i=1; i<mbs+1; i++) {
1796     b->i[i] = b->i[i-1] + b->imax[i-1];
1797   }
1798 
1799   /* b->ilen will count nonzeros in each block row so far. */
1800   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1801   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1802   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1803 
1804   b->bs               = bs;
1805   b->bs2              = bs2;
1806   b->mbs              = mbs;
1807   b->nz               = 0;
1808   b->maxnz            = nz*bs2;
1809   b->sorted           = 0;
1810   b->roworiented      = 1;
1811   b->nonew            = 0;
1812   b->diag             = 0;
1813   b->solve_work       = 0;
1814   b->mult_work        = 0;
1815   b->spptr            = 0;
1816   *A = B;
1817   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1818   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1819   return 0;
1820 }
1821 
1822 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1823 {
1824   Mat         C;
1825   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1826   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1827 
1828   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1829 
1830   *B = 0;
1831   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1832   PLogObjectCreate(C);
1833   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1834   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1835   C->destroy    = MatDestroy_SeqBAIJ;
1836   C->view       = MatView_SeqBAIJ;
1837   C->factor     = A->factor;
1838   c->row        = 0;
1839   c->col        = 0;
1840   C->assembled  = PETSC_TRUE;
1841 
1842   c->m = C->m   = a->m;
1843   c->n = C->n   = a->n;
1844   C->M          = a->m;
1845   C->N          = a->n;
1846 
1847   c->bs         = a->bs;
1848   c->bs2        = a->bs2;
1849   c->mbs        = a->mbs;
1850   c->nbs        = a->nbs;
1851 
1852   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1853   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1854   for ( i=0; i<mbs; i++ ) {
1855     c->imax[i] = a->imax[i];
1856     c->ilen[i] = a->ilen[i];
1857   }
1858 
1859   /* allocate the matrix space */
1860   c->singlemalloc = 1;
1861   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1862   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1863   c->j  = (int *) (c->a + nz*bs2);
1864   c->i  = c->j + nz;
1865   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1866   if (mbs > 0) {
1867     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1868     if (cpvalues == COPY_VALUES) {
1869       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1870     }
1871   }
1872 
1873   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1874   c->sorted      = a->sorted;
1875   c->roworiented = a->roworiented;
1876   c->nonew       = a->nonew;
1877 
1878   if (a->diag) {
1879     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1880     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1881     for ( i=0; i<mbs; i++ ) {
1882       c->diag[i] = a->diag[i];
1883     }
1884   }
1885   else c->diag          = 0;
1886   c->nz                 = a->nz;
1887   c->maxnz              = a->maxnz;
1888   c->solve_work         = 0;
1889   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1890   c->mult_work          = 0;
1891   *B = C;
1892   return 0;
1893 }
1894 
1895 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1896 {
1897   Mat_SeqBAIJ  *a;
1898   Mat          B;
1899   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1900   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1901   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1902   int          *masked, nmask,tmp,bs2,ishift;
1903   Scalar       *aa;
1904   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1905 
1906   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1907   bs2  = bs*bs;
1908 
1909   MPI_Comm_size(comm,&size);
1910   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1911   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1912   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1913   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1914   M = header[1]; N = header[2]; nz = header[3];
1915 
1916   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1917 
1918   /*
1919      This code adds extra rows to make sure the number of rows is
1920     divisible by the blocksize
1921   */
1922   mbs        = M/bs;
1923   extra_rows = bs - M + bs*(mbs);
1924   if (extra_rows == bs) extra_rows = 0;
1925   else                  mbs++;
1926   if (extra_rows) {
1927     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1928   }
1929 
1930   /* read in row lengths */
1931   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1932   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1933   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1934 
1935   /* read in column indices */
1936   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1937   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1938   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1939 
1940   /* loop over row lengths determining block row lengths */
1941   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1942   PetscMemzero(browlengths,mbs*sizeof(int));
1943   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1944   PetscMemzero(mask,mbs*sizeof(int));
1945   masked = mask + mbs;
1946   rowcount = 0; nzcount = 0;
1947   for ( i=0; i<mbs; i++ ) {
1948     nmask = 0;
1949     for ( j=0; j<bs; j++ ) {
1950       kmax = rowlengths[rowcount];
1951       for ( k=0; k<kmax; k++ ) {
1952         tmp = jj[nzcount++]/bs;
1953         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1954       }
1955       rowcount++;
1956     }
1957     browlengths[i] += nmask;
1958     /* zero out the mask elements we set */
1959     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1960   }
1961 
1962   /* create our matrix */
1963   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1964          CHKERRQ(ierr);
1965   B = *A;
1966   a = (Mat_SeqBAIJ *) B->data;
1967 
1968   /* set matrix "i" values */
1969   a->i[0] = 0;
1970   for ( i=1; i<= mbs; i++ ) {
1971     a->i[i]      = a->i[i-1] + browlengths[i-1];
1972     a->ilen[i-1] = browlengths[i-1];
1973   }
1974   a->nz         = 0;
1975   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1976 
1977   /* read in nonzero values */
1978   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1979   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1980   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1981 
1982   /* set "a" and "j" values into matrix */
1983   nzcount = 0; jcount = 0;
1984   for ( i=0; i<mbs; i++ ) {
1985     nzcountb = nzcount;
1986     nmask    = 0;
1987     for ( j=0; j<bs; j++ ) {
1988       kmax = rowlengths[i*bs+j];
1989       for ( k=0; k<kmax; k++ ) {
1990         tmp = jj[nzcount++]/bs;
1991 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1992       }
1993       rowcount++;
1994     }
1995     /* sort the masked values */
1996     PetscSortInt(nmask,masked);
1997 
1998     /* set "j" values into matrix */
1999     maskcount = 1;
2000     for ( j=0; j<nmask; j++ ) {
2001       a->j[jcount++]  = masked[j];
2002       mask[masked[j]] = maskcount++;
2003     }
2004     /* set "a" values into matrix */
2005     ishift = bs2*a->i[i];
2006     for ( j=0; j<bs; j++ ) {
2007       kmax = rowlengths[i*bs+j];
2008       for ( k=0; k<kmax; k++ ) {
2009         tmp    = jj[nzcountb]/bs ;
2010         block  = mask[tmp] - 1;
2011         point  = jj[nzcountb] - bs*tmp;
2012         idx    = ishift + bs2*block + j + bs*point;
2013         a->a[idx] = aa[nzcountb++];
2014       }
2015     }
2016     /* zero out the mask elements we set */
2017     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2018   }
2019   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2020 
2021   PetscFree(rowlengths);
2022   PetscFree(browlengths);
2023   PetscFree(aa);
2024   PetscFree(jj);
2025   PetscFree(mask);
2026 
2027   B->assembled = PETSC_TRUE;
2028 
2029   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2030   if (flg) {
2031     Viewer tviewer;
2032     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2033     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
2034     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2035     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2036   }
2037   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2038   if (flg) {
2039     Viewer tviewer;
2040     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2041     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
2042     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2043     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2044   }
2045   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2046   if (flg) {
2047     Viewer tviewer;
2048     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2049     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2050     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2051   }
2052   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2053   if (flg) {
2054     Viewer tviewer;
2055     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2056     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
2057     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2058     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2059   }
2060   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2061   if (flg) {
2062     Viewer tviewer;
2063     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2064     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2065     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2066     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2067   }
2068   return 0;
2069 }
2070 
2071 
2072 
2073