xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bbdbd9a052d0f3015c70f8781ea47bd69446daae)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.69 1996/11/07 15:09:41 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 int MatMarkDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         i,j, *diag, m = a->mbs;
23 
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28       if (a->j[j] == i) {
29         diag[i] = j;
30         break;
31       }
32     }
33   }
34   a->diag = diag;
35   return 0;
36 }
37 
38 #include "draw.h"
39 #include "pinclude/pviewer.h"
40 #include "sys.h"
41 
42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
43 
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   *nn = n;
51   if (!ia) return 0;
52   if (symmetric) {
53     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
54   } else if (oshift == 1) {
55     /* temporarily add 1 to i and j indices */
56     int nz = a->i[n] + 1;
57     for ( i=0; i<nz; i++ ) a->j[i]++;
58     for ( i=0; i<n+1; i++ ) a->i[i]++;
59     *ia = a->i; *ja = a->j;
60   } else {
61     *ia = a->i; *ja = a->j;
62   }
63 
64   return 0;
65 }
66 
67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
68                                 PetscTruth *done)
69 {
70   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
71   int         i,n = a->mbs;
72 
73   if (!ia) return 0;
74   if (symmetric) {
75     PetscFree(*ia);
76     PetscFree(*ja);
77   } else if (oshift == 1) {
78     int nz = a->i[n];
79     for ( i=0; i<nz; i++ ) a->j[i]--;
80     for ( i=0; i<n+1; i++ ) a->i[i]--;
81   }
82   return 0;
83 }
84 
85 
86 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
87 {
88   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
89   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90   Scalar      *aa;
91 
92   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
94   col_lens[0] = MAT_COOKIE;
95 
96   col_lens[1] = a->m;
97   col_lens[2] = a->n;
98   col_lens[3] = a->nz*bs2;
99 
100   /* store lengths of each row and write (including header) to file */
101   count = 0;
102   for ( i=0; i<a->mbs; i++ ) {
103     for ( j=0; j<bs; j++ ) {
104       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105     }
106   }
107   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
108   PetscFree(col_lens);
109 
110   /* store column indices (zero start index) */
111   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116         for ( l=0; l<bs; l++ ) {
117           jj[count++] = bs*a->j[k] + l;
118         }
119       }
120     }
121   }
122   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123   PetscFree(jj);
124 
125   /* store nonzero values */
126   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127   count = 0;
128   for ( i=0; i<a->mbs; i++ ) {
129     for ( j=0; j<bs; j++ ) {
130       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131         for ( l=0; l<bs; l++ ) {
132           aa[count++] = a->a[bs2*k + l*bs + j];
133         }
134       }
135     }
136   }
137   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138   PetscFree(aa);
139   return 0;
140 }
141 
142 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
143 {
144   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
145   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
146   FILE        *fd;
147   char        *outputname;
148 
149   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
150   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
151   ierr = ViewerGetFormat(viewer,&format);
152   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
153     fprintf(fd,"  block size is %d\n",bs);
154   }
155   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
157   }
158   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
159     for ( i=0; i<a->mbs; i++ ) {
160       for ( j=0; j<bs; j++ ) {
161         fprintf(fd,"row %d:",i*bs+j);
162         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
163           for ( l=0; l<bs; l++ ) {
164 #if defined(PETSC_COMPLEX)
165           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
166             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
167               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
168           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
169             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
170 #else
171           if (a->a[bs2*k + l*bs + j] != 0.0)
172             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
173 #endif
174           }
175         }
176         fprintf(fd,"\n");
177       }
178     }
179   }
180   else {
181     for ( i=0; i<a->mbs; i++ ) {
182       for ( j=0; j<bs; j++ ) {
183         fprintf(fd,"row %d:",i*bs+j);
184         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185           for ( l=0; l<bs; l++ ) {
186 #if defined(PETSC_COMPLEX)
187           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
188             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
189               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
190           }
191           else {
192             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
193           }
194 #else
195             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
196 #endif
197           }
198         }
199         fprintf(fd,"\n");
200       }
201     }
202   }
203   fflush(fd);
204   return 0;
205 }
206 
207 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
208 {
209   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
210   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
211   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
212   Scalar       *aa;
213   Draw         draw;
214   DrawButton   button;
215   PetscTruth   isnull;
216 
217   ViewerDrawGetDraw(viewer,&draw);
218   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
219 
220   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
221   xr += w;    yr += h;  xl = -w;     yl = -h;
222   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
223   /* loop over matrix elements drawing boxes */
224   color = DRAW_BLUE;
225   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
226     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
227       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
228       x_l = a->j[j]*bs; x_r = x_l + 1.0;
229       aa = a->a + j*bs2;
230       for ( k=0; k<bs; k++ ) {
231         for ( l=0; l<bs; l++ ) {
232           if (PetscReal(*aa++) >=  0.) continue;
233           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
234         }
235       }
236     }
237   }
238   color = DRAW_CYAN;
239   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
240     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
241       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
242       x_l = a->j[j]*bs; x_r = x_l + 1.0;
243       aa = a->a + j*bs2;
244       for ( k=0; k<bs; k++ ) {
245         for ( l=0; l<bs; l++ ) {
246           if (PetscReal(*aa++) != 0.) continue;
247           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
248         }
249       }
250     }
251   }
252 
253   color = DRAW_RED;
254   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
255     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
256       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
257       x_l = a->j[j]*bs; x_r = x_l + 1.0;
258       aa = a->a + j*bs2;
259       for ( k=0; k<bs; k++ ) {
260         for ( l=0; l<bs; l++ ) {
261           if (PetscReal(*aa++) <= 0.) continue;
262           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
263         }
264       }
265     }
266   }
267 
268   DrawFlush(draw);
269   DrawGetPause(draw,&pause);
270   if (pause >= 0) { PetscSleep(pause); return 0;}
271 
272   /* allow the matrix to zoom or shrink */
273   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
274   while (button != BUTTON_RIGHT) {
275     DrawClear(draw);
276     if (button == BUTTON_LEFT) scale = .5;
277     else if (button == BUTTON_CENTER) scale = 2.;
278     xl = scale*(xl + w - xc) + xc - w*scale;
279     xr = scale*(xr - w - xc) + xc + w*scale;
280     yl = scale*(yl + h - yc) + yc - h*scale;
281     yr = scale*(yr - h - yc) + yc + h*scale;
282     w *= scale; h *= scale;
283     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
284     color = DRAW_BLUE;
285     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
286       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
287         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
288         x_l = a->j[j]*bs; x_r = x_l + 1.0;
289         aa = a->a + j*bs2;
290         for ( k=0; k<bs; k++ ) {
291           for ( l=0; l<bs; l++ ) {
292             if (PetscReal(*aa++) >=  0.) continue;
293             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
294           }
295         }
296       }
297     }
298     color = DRAW_CYAN;
299     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
300       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
301         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
302         x_l = a->j[j]*bs; x_r = x_l + 1.0;
303         aa = a->a + j*bs2;
304         for ( k=0; k<bs; k++ ) {
305           for ( l=0; l<bs; l++ ) {
306           if (PetscReal(*aa++) != 0.) continue;
307           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
308           }
309         }
310       }
311     }
312 
313     color = DRAW_RED;
314     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
315       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
316         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
317         x_l = a->j[j]*bs; x_r = x_l + 1.0;
318         aa = a->a + j*bs2;
319         for ( k=0; k<bs; k++ ) {
320           for ( l=0; l<bs; l++ ) {
321             if (PetscReal(*aa++) <= 0.) continue;
322             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
323           }
324         }
325       }
326     }
327     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
328   }
329   return 0;
330 }
331 
332 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
333 {
334   Mat         A = (Mat) obj;
335   ViewerType  vtype;
336   int         ierr;
337 
338   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
339   if (vtype == MATLAB_VIEWER) {
340     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
341   }
342   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343     return MatView_SeqBAIJ_ASCII(A,viewer);
344   }
345   else if (vtype == BINARY_FILE_VIEWER) {
346     return MatView_SeqBAIJ_Binary(A,viewer);
347   }
348   else if (vtype == DRAW_VIEWER) {
349     return MatView_SeqBAIJ_Draw(A,viewer);
350   }
351   return 0;
352 }
353 
354 #define CHUNKSIZE  10
355 
356 /* This version has row oriented v  */
357 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
361   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
362   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
363   int          ridx,cidx,bs2=a->bs2;
364   Scalar      *ap,value,*aa=a->a,*bap;
365 
366   for ( k=0; k<m; k++ ) { /* loop over added rows */
367     row  = im[k]; brow = row/bs;
368 #if defined(PETSC_BOPT_g)
369     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
370     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
371 #endif
372     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
373     rmax = imax[brow]; nrow = ailen[brow];
374     low = 0;
375     for ( l=0; l<n; l++ ) { /* loop over added columns */
376 #if defined(PETSC_BOPT_g)
377       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
378       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
379 #endif
380       col = in[l]; bcol = col/bs;
381       ridx = row % bs; cidx = col % bs;
382       if (roworiented) {
383         value = *v++;
384       }
385       else {
386         value = v[k + l*m];
387       }
388       if (!sorted) low = 0; high = nrow;
389       while (high-low > 5) {
390         t = (low+high)/2;
391         if (rp[t] > bcol) high = t;
392         else              low  = t;
393       }
394       for ( i=low; i<high; i++ ) {
395         if (rp[i] > bcol) break;
396         if (rp[i] == bcol) {
397           bap  = ap +  bs2*i + bs*cidx + ridx;
398           if (is == ADD_VALUES) *bap += value;
399           else                  *bap  = value;
400           goto noinsert;
401         }
402       }
403       if (nonew) goto noinsert;
404       if (nrow >= rmax) {
405         /* there is no extra room in row, therefore enlarge */
406         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
407         Scalar *new_a;
408 
409         /* malloc new storage space */
410         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
411         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
412         new_j   = (int *) (new_a + bs2*new_nz);
413         new_i   = new_j + new_nz;
414 
415         /* copy over old data into new slots */
416         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
417         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
418         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
419         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
420         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
421                                                            len*sizeof(int));
422         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
423         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
424         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
425                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
426         /* free up old matrix storage */
427         PetscFree(a->a);
428         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
429         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
430         a->singlemalloc = 1;
431 
432         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
433         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
434         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
435         a->maxnz += bs2*CHUNKSIZE;
436         a->reallocs++;
437         a->nz++;
438       }
439       N = nrow++ - 1;
440       /* shift up all the later entries in this row */
441       for ( ii=N; ii>=i; ii-- ) {
442         rp[ii+1] = rp[ii];
443         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
444       }
445       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
446       rp[i]                      = bcol;
447       ap[bs2*i + bs*cidx + ridx] = value;
448       noinsert:;
449       low = i;
450     }
451     ailen[brow] = nrow;
452   }
453   return 0;
454 }
455 
456 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
457 {
458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
459   *m = a->m; *n = a->n;
460   return 0;
461 }
462 
463 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = 0; *n = a->m;
467   return 0;
468 }
469 
470 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
474   Scalar      *aa,*v_i,*aa_i;
475 
476   bs  = a->bs;
477   ai  = a->i;
478   aj  = a->j;
479   aa  = a->a;
480   bs2 = a->bs2;
481 
482   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
483 
484   bn  = row/bs;   /* Block number */
485   bp  = row % bs; /* Block Position */
486   M   = ai[bn+1] - ai[bn];
487   *nz = bs*M;
488 
489   if (v) {
490     *v = 0;
491     if (*nz) {
492       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
493       for ( i=0; i<M; i++ ) { /* for each block in the block row */
494         v_i  = *v + i*bs;
495         aa_i = aa + bs2*(ai[bn] + i);
496         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
497       }
498     }
499   }
500 
501   if (idx) {
502     *idx = 0;
503     if (*nz) {
504       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
505       for ( i=0; i<M; i++ ) { /* for each block in the block row */
506         idx_i = *idx + i*bs;
507         itmp  = bs*aj[ai[bn] + i];
508         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
509       }
510     }
511   }
512   return 0;
513 }
514 
515 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
516 {
517   if (idx) {if (*idx) PetscFree(*idx);}
518   if (v)   {if (*v)   PetscFree(*v);}
519   return 0;
520 }
521 
522 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
523 {
524   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
525   Mat         C;
526   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
527   int         *rows,*cols,bs2=a->bs2;
528   Scalar      *array=a->a;
529 
530   if (B==PETSC_NULL && mbs!=nbs)
531     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
532   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
533   PetscMemzero(col,(1+nbs)*sizeof(int));
534 
535   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
536   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
537   PetscFree(col);
538   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
539   cols = rows + bs;
540   for ( i=0; i<mbs; i++ ) {
541     cols[0] = i*bs;
542     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
543     len = ai[i+1] - ai[i];
544     for ( j=0; j<len; j++ ) {
545       rows[0] = (*aj++)*bs;
546       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
547       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
548       array += bs2;
549     }
550   }
551   PetscFree(rows);
552 
553   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
554   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555 
556   if (B != PETSC_NULL) {
557     *B = C;
558   } else {
559     /* This isn't really an in-place transpose */
560     PetscFree(a->a);
561     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
562     if (a->diag) PetscFree(a->diag);
563     if (a->ilen) PetscFree(a->ilen);
564     if (a->imax) PetscFree(a->imax);
565     if (a->solve_work) PetscFree(a->solve_work);
566     PetscFree(a);
567     PetscMemcpy(A,C,sizeof(struct _Mat));
568     PetscHeaderDestroy(C);
569   }
570   return 0;
571 }
572 
573 
574 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
575 {
576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
577   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
578   int        m = a->m,*ip, N, *ailen = a->ilen;
579   int        mbs = a->mbs, bs2 = a->bs2;
580   Scalar     *aa = a->a, *ap;
581 
582   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
583 
584   for ( i=1; i<mbs; i++ ) {
585     /* move each row back by the amount of empty slots (fshift) before it*/
586     fshift += imax[i-1] - ailen[i-1];
587     if (fshift) {
588       ip = aj + ai[i]; ap = aa + bs2*ai[i];
589       N = ailen[i];
590       for ( j=0; j<N; j++ ) {
591         ip[j-fshift] = ip[j];
592         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
593       }
594     }
595     ai[i] = ai[i-1] + ailen[i-1];
596   }
597   if (mbs) {
598     fshift += imax[mbs-1] - ailen[mbs-1];
599     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
600   }
601   /* reset ilen and imax for each row */
602   for ( i=0; i<mbs; i++ ) {
603     ailen[i] = imax[i] = ai[i+1] - ai[i];
604   }
605   a->nz = ai[mbs];
606 
607   /* diagonals may have moved, so kill the diagonal pointers */
608   if (fshift && a->diag) {
609     PetscFree(a->diag);
610     PLogObjectMemory(A,-(m+1)*sizeof(int));
611     a->diag = 0;
612   }
613   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
614            fshift*bs2,a->nz*bs2,m,a->bs);
615   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
616            a->reallocs);
617   A->info.nz_unneeded  = (double)fshift*bs2;
618 
619   return 0;
620 }
621 
622 static int MatZeroEntries_SeqBAIJ(Mat A)
623 {
624   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
625   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
626   return 0;
627 }
628 
629 int MatDestroy_SeqBAIJ(PetscObject obj)
630 {
631   Mat         A  = (Mat) obj;
632   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
633 
634 #if defined(PETSC_LOG)
635   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
636 #endif
637   PetscFree(a->a);
638   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
639   if (a->diag) PetscFree(a->diag);
640   if (a->ilen) PetscFree(a->ilen);
641   if (a->imax) PetscFree(a->imax);
642   if (a->solve_work) PetscFree(a->solve_work);
643   if (a->mult_work) PetscFree(a->mult_work);
644   PetscFree(a);
645   PLogObjectDestroy(A);
646   PetscHeaderDestroy(A);
647   return 0;
648 }
649 
650 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
651 {
652   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
653   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
654   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
655   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
656   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
657   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
658   else if (op == MAT_ROWS_SORTED ||
659            op == MAT_SYMMETRIC ||
660            op == MAT_STRUCTURALLY_SYMMETRIC ||
661            op == MAT_YES_NEW_DIAGONALS)
662     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
663   else if (op == MAT_NO_NEW_DIAGONALS)
664     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
665   else
666     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
667   return 0;
668 }
669 
670 
671 /* -------------------------------------------------------*/
672 /* Should check that shapes of vectors and matrices match */
673 /* -------------------------------------------------------*/
674 #include "pinclude/plapack.h"
675 
676 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
677 {
678   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
679   register Scalar *x,*z,*v,sum;
680   int             mbs=a->mbs,i,*idx,*ii,n;
681 
682   VecGetArray_Fast(xx,x);
683   VecGetArray_Fast(zz,z);
684 
685   idx   = a->j;
686   v     = a->a;
687   ii    = a->i;
688 
689   for ( i=0; i<mbs; i++ ) {
690     n    = ii[1] - ii[0]; ii++;
691     sum  = 0.0;
692     while (n--) sum += *v++ * x[*idx++];
693     z[i] = sum;
694   }
695   VecRestoreArray_Fast(xx,x);
696   VecRestoreArray_Fast(zz,z);
697   PLogFlops(2*a->nz - a->m);
698   return 0;
699 }
700 
701 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
702 {
703   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
704   register Scalar *x,*z,*v,*xb,sum1,sum2;
705   register Scalar x1,x2;
706   int             mbs=a->mbs,i,*idx,*ii;
707   int             j,n;
708 
709   VecGetArray_Fast(xx,x);
710   VecGetArray_Fast(zz,z);
711 
712   idx   = a->j;
713   v     = a->a;
714   ii    = a->i;
715 
716   for ( i=0; i<mbs; i++ ) {
717     n  = ii[1] - ii[0]; ii++;
718     sum1 = 0.0; sum2 = 0.0;
719     for ( j=0; j<n; j++ ) {
720       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
721       sum1 += v[0]*x1 + v[2]*x2;
722       sum2 += v[1]*x1 + v[3]*x2;
723       v += 4;
724     }
725     z[0] = sum1; z[1] = sum2;
726     z += 2;
727   }
728   VecRestoreArray_Fast(xx,x);
729   VecRestoreArray_Fast(zz,z);
730   PLogFlops(4*a->nz - a->m);
731   return 0;
732 }
733 
734 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
735 {
736   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
737   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
738   int             mbs=a->mbs,i,*idx,*ii,j,n;
739 
740   VecGetArray_Fast(xx,x);
741   VecGetArray_Fast(zz,z);
742 
743   idx   = a->j;
744   v     = a->a;
745   ii    = a->i;
746 
747   for ( i=0; i<mbs; i++ ) {
748     n  = ii[1] - ii[0]; ii++;
749     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
750     for ( j=0; j<n; j++ ) {
751       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
752       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
753       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
754       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
755       v += 9;
756     }
757     z[0] = sum1; z[1] = sum2; z[2] = sum3;
758     z += 3;
759   }
760   VecRestoreArray_Fast(xx,x);
761   VecRestoreArray_Fast(zz,z);
762   PLogFlops(18*a->nz - a->m);
763   return 0;
764 }
765 
766 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
767 {
768   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
769   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
770   register Scalar x1,x2,x3,x4;
771   int             mbs=a->mbs,i,*idx,*ii;
772   int             j,n;
773 
774   VecGetArray_Fast(xx,x);
775   VecGetArray_Fast(zz,z);
776 
777   idx   = a->j;
778   v     = a->a;
779   ii    = a->i;
780 
781   for ( i=0; i<mbs; i++ ) {
782     n  = ii[1] - ii[0]; ii++;
783     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
784     for ( j=0; j<n; j++ ) {
785       xb = x + 4*(*idx++);
786       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
787       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
788       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
789       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
790       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
791       v += 16;
792     }
793     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
794     z += 4;
795   }
796   VecRestoreArray_Fast(xx,x);
797   VecRestoreArray_Fast(zz,z);
798   PLogFlops(32*a->nz - a->m);
799   return 0;
800 }
801 
802 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
803 {
804   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
805   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
806   register Scalar x1,x2,x3,x4,x5;
807   int             mbs=a->mbs,i,*idx,*ii,j,n;
808 
809   VecGetArray_Fast(xx,x);
810   VecGetArray_Fast(zz,z);
811 
812   idx   = a->j;
813   v     = a->a;
814   ii    = a->i;
815 
816   for ( i=0; i<mbs; i++ ) {
817     n  = ii[1] - ii[0]; ii++;
818     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
819     for ( j=0; j<n; j++ ) {
820       xb = x + 5*(*idx++);
821       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
822       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
823       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
824       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
825       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
826       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
827       v += 25;
828     }
829     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
830     z += 5;
831   }
832   VecRestoreArray_Fast(xx,x);
833   VecRestoreArray_Fast(zz,z);
834   PLogFlops(50*a->nz - a->m);
835   return 0;
836 }
837 
838 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
839 {
840   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
841   register Scalar *x,*z,*v,*xb;
842   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
843   int             _One = 1,ncols,k;
844   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
845 
846   VecGetArray_Fast(xx,x);
847   VecGetArray_Fast(zz,z);
848 
849   idx   = a->j;
850   v     = a->a;
851   ii    = a->i;
852 
853 
854   if (!a->mult_work) {
855     k = PetscMax(a->m,a->n);
856     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
857   }
858   work = a->mult_work;
859   for ( i=0; i<mbs; i++ ) {
860     n     = ii[1] - ii[0]; ii++;
861     ncols = n*bs;
862     workt = work;
863     for ( j=0; j<n; j++ ) {
864       xb = x + bs*(*idx++);
865       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
866       workt += bs;
867     }
868     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
869     v += n*bs2;
870     z += bs;
871   }
872   VecRestoreArray_Fast(xx,x);
873   VecRestoreArray_Fast(zz,z);
874   PLogFlops(2*a->nz*bs2 - a->m);
875   return 0;
876 }
877 
878 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
879 {
880   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
881   register Scalar *x,*y,*z,*v,sum;
882   int             mbs=a->mbs,i,*idx,*ii,n;
883 
884   VecGetArray_Fast(xx,x);
885   VecGetArray_Fast(yy,y);
886   VecGetArray_Fast(zz,z);
887 
888   idx   = a->j;
889   v     = a->a;
890   ii    = a->i;
891 
892   for ( i=0; i<mbs; i++ ) {
893     n    = ii[1] - ii[0]; ii++;
894     sum  = y[i];
895     while (n--) sum += *v++ * x[*idx++];
896     z[i] = sum;
897   }
898   VecRestoreArray_Fast(xx,x);
899   VecRestoreArray_Fast(yy,y);
900   VecRestoreArray_Fast(zz,z);
901   PLogFlops(2*a->nz);
902   return 0;
903 }
904 
905 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
906 {
907   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
908   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
909   register Scalar x1,x2;
910   int             mbs=a->mbs,i,*idx,*ii;
911   int             j,n;
912 
913   VecGetArray_Fast(xx,x);
914   VecGetArray_Fast(yy,y);
915   VecGetArray_Fast(zz,z);
916 
917   idx   = a->j;
918   v     = a->a;
919   ii    = a->i;
920 
921   for ( i=0; i<mbs; i++ ) {
922     n  = ii[1] - ii[0]; ii++;
923     sum1 = y[0]; sum2 = y[1];
924     for ( j=0; j<n; j++ ) {
925       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
926       sum1 += v[0]*x1 + v[2]*x2;
927       sum2 += v[1]*x1 + v[3]*x2;
928       v += 4;
929     }
930     z[0] = sum1; z[1] = sum2;
931     z += 2; y += 2;
932   }
933   VecRestoreArray_Fast(xx,x);
934   VecRestoreArray_Fast(yy,y);
935   VecRestoreArray_Fast(zz,z);
936   PLogFlops(4*a->nz);
937   return 0;
938 }
939 
940 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
941 {
942   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
943   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
944   int             mbs=a->mbs,i,*idx,*ii,j,n;
945 
946   VecGetArray_Fast(xx,x);
947   VecGetArray_Fast(yy,y);
948   VecGetArray_Fast(zz,z);
949 
950   idx   = a->j;
951   v     = a->a;
952   ii    = a->i;
953 
954   for ( i=0; i<mbs; i++ ) {
955     n  = ii[1] - ii[0]; ii++;
956     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
957     for ( j=0; j<n; j++ ) {
958       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
959       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
960       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
961       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
962       v += 9;
963     }
964     z[0] = sum1; z[1] = sum2; z[2] = sum3;
965     z += 3; y += 3;
966   }
967   VecRestoreArray_Fast(xx,x);
968   VecRestoreArray_Fast(yy,y);
969   VecRestoreArray_Fast(zz,z);
970   PLogFlops(18*a->nz);
971   return 0;
972 }
973 
974 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
977   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
978   register Scalar x1,x2,x3,x4;
979   int             mbs=a->mbs,i,*idx,*ii;
980   int             j,n;
981 
982   VecGetArray_Fast(xx,x);
983   VecGetArray_Fast(yy,y);
984   VecGetArray_Fast(zz,z);
985 
986   idx   = a->j;
987   v     = a->a;
988   ii    = a->i;
989 
990   for ( i=0; i<mbs; i++ ) {
991     n  = ii[1] - ii[0]; ii++;
992     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
993     for ( j=0; j<n; j++ ) {
994       xb = x + 4*(*idx++);
995       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
996       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
997       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
998       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
999       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1000       v += 16;
1001     }
1002     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1003     z += 4; y += 4;
1004   }
1005   VecRestoreArray_Fast(xx,x);
1006   VecRestoreArray_Fast(yy,y);
1007   VecRestoreArray_Fast(zz,z);
1008   PLogFlops(32*a->nz);
1009   return 0;
1010 }
1011 
1012 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1013 {
1014   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1015   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1016   register Scalar x1,x2,x3,x4,x5;
1017   int             mbs=a->mbs,i,*idx,*ii,j,n;
1018 
1019   VecGetArray_Fast(xx,x);
1020   VecGetArray_Fast(yy,y);
1021   VecGetArray_Fast(zz,z);
1022 
1023   idx   = a->j;
1024   v     = a->a;
1025   ii    = a->i;
1026 
1027   for ( i=0; i<mbs; i++ ) {
1028     n  = ii[1] - ii[0]; ii++;
1029     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1030     for ( j=0; j<n; j++ ) {
1031       xb = x + 5*(*idx++);
1032       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1033       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1034       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1035       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1036       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1037       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1038       v += 25;
1039     }
1040     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1041     z += 5; y += 5;
1042   }
1043   VecRestoreArray_Fast(xx,x);
1044   VecRestoreArray_Fast(yy,y);
1045   VecRestoreArray_Fast(zz,z);
1046   PLogFlops(50*a->nz);
1047   return 0;
1048 }
1049 
1050 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1053   register Scalar *x,*z,*v,*xb;
1054   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1055   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1056 
1057   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1058 
1059   VecGetArray_Fast(xx,x);
1060   VecGetArray_Fast(zz,z);
1061 
1062   idx   = a->j;
1063   v     = a->a;
1064   ii    = a->i;
1065 
1066 
1067   if (!a->mult_work) {
1068     k = PetscMax(a->m,a->n);
1069     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1070   }
1071   work = a->mult_work;
1072   for ( i=0; i<mbs; i++ ) {
1073     n     = ii[1] - ii[0]; ii++;
1074     ncols = n*bs;
1075     workt = work;
1076     for ( j=0; j<n; j++ ) {
1077       xb = x + bs*(*idx++);
1078       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1079       workt += bs;
1080     }
1081     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1082     v += n*bs2;
1083     z += bs;
1084   }
1085   VecRestoreArray_Fast(xx,x);
1086   VecRestoreArray_Fast(zz,z);
1087   PLogFlops(2*a->nz*bs2 );
1088   return 0;
1089 }
1090 
1091 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1092 {
1093   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1094   Scalar          *xg,*zg,*zb;
1095   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1096   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1097   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1098 
1099 
1100   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1101   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1102   PetscMemzero(z,N*sizeof(Scalar));
1103 
1104   idx   = a->j;
1105   v     = a->a;
1106   ii    = a->i;
1107 
1108   switch (bs) {
1109   case 1:
1110     for ( i=0; i<mbs; i++ ) {
1111       n  = ii[1] - ii[0]; ii++;
1112       xb = x + i; x1 = xb[0];
1113       ib = idx + ai[i];
1114       for ( j=0; j<n; j++ ) {
1115         rval    = ib[j];
1116         z[rval] += *v++ * x1;
1117       }
1118     }
1119     break;
1120   case 2:
1121     for ( i=0; i<mbs; i++ ) {
1122       n  = ii[1] - ii[0]; ii++;
1123       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1124       ib = idx + ai[i];
1125       for ( j=0; j<n; j++ ) {
1126         rval      = ib[j]*2;
1127         z[rval++] += v[0]*x1 + v[1]*x2;
1128         z[rval++] += v[2]*x1 + v[3]*x2;
1129         v += 4;
1130       }
1131     }
1132     break;
1133   case 3:
1134     for ( i=0; i<mbs; i++ ) {
1135       n  = ii[1] - ii[0]; ii++;
1136       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1137       ib = idx + ai[i];
1138       for ( j=0; j<n; j++ ) {
1139         rval      = ib[j]*3;
1140         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1141         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1142         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1143         v += 9;
1144       }
1145     }
1146     break;
1147   case 4:
1148     for ( i=0; i<mbs; i++ ) {
1149       n  = ii[1] - ii[0]; ii++;
1150       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1151       ib = idx + ai[i];
1152       for ( j=0; j<n; j++ ) {
1153         rval      = ib[j]*4;
1154         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1155         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1156         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1157         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1158         v += 16;
1159       }
1160     }
1161     break;
1162   case 5:
1163     for ( i=0; i<mbs; i++ ) {
1164       n  = ii[1] - ii[0]; ii++;
1165       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1166       x4 = xb[3];   x5 = xb[4];
1167       ib = idx + ai[i];
1168       for ( j=0; j<n; j++ ) {
1169         rval      = ib[j]*5;
1170         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1171         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1172         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1173         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1174         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1175         v += 25;
1176       }
1177     }
1178     break;
1179       /* block sizes larger then 5 by 5 are handled by BLAS */
1180     default: {
1181       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1182       if (!a->mult_work) {
1183         k = PetscMax(a->m,a->n);
1184         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1185         CHKPTRQ(a->mult_work);
1186       }
1187       work = a->mult_work;
1188       for ( i=0; i<mbs; i++ ) {
1189         n     = ii[1] - ii[0]; ii++;
1190         ncols = n*bs;
1191         PetscMemzero(work,ncols*sizeof(Scalar));
1192         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1193         v += n*bs2;
1194         x += bs;
1195         workt = work;
1196         for ( j=0; j<n; j++ ) {
1197           zb = z + bs*(*idx++);
1198           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1199           workt += bs;
1200         }
1201       }
1202     }
1203   }
1204   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1205   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1206   PLogFlops(2*a->nz*a->bs2 - a->n);
1207   return 0;
1208 }
1209 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1210 {
1211   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1212   Scalar          *xg,*zg,*zb;
1213   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1214   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1215   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1216 
1217 
1218 
1219   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1220   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1221 
1222   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1223   else PetscMemzero(z,N*sizeof(Scalar));
1224 
1225 
1226   idx   = a->j;
1227   v     = a->a;
1228   ii    = a->i;
1229 
1230   switch (bs) {
1231   case 1:
1232     for ( i=0; i<mbs; i++ ) {
1233       n  = ii[1] - ii[0]; ii++;
1234       xb = x + i; x1 = xb[0];
1235       ib = idx + ai[i];
1236       for ( j=0; j<n; j++ ) {
1237         rval    = ib[j];
1238         z[rval] += *v++ * x1;
1239       }
1240     }
1241     break;
1242   case 2:
1243     for ( i=0; i<mbs; i++ ) {
1244       n  = ii[1] - ii[0]; ii++;
1245       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1246       ib = idx + ai[i];
1247       for ( j=0; j<n; j++ ) {
1248         rval      = ib[j]*2;
1249         z[rval++] += v[0]*x1 + v[1]*x2;
1250         z[rval++] += v[2]*x1 + v[3]*x2;
1251         v += 4;
1252       }
1253     }
1254     break;
1255   case 3:
1256     for ( i=0; i<mbs; i++ ) {
1257       n  = ii[1] - ii[0]; ii++;
1258       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1259       ib = idx + ai[i];
1260       for ( j=0; j<n; j++ ) {
1261         rval      = ib[j]*3;
1262         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1263         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1264         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1265         v += 9;
1266       }
1267     }
1268     break;
1269   case 4:
1270     for ( i=0; i<mbs; i++ ) {
1271       n  = ii[1] - ii[0]; ii++;
1272       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1273       ib = idx + ai[i];
1274       for ( j=0; j<n; j++ ) {
1275         rval      = ib[j]*4;
1276         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1277         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1278         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1279         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1280         v += 16;
1281       }
1282     }
1283     break;
1284   case 5:
1285     for ( i=0; i<mbs; i++ ) {
1286       n  = ii[1] - ii[0]; ii++;
1287       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1288       x4 = xb[3];   x5 = xb[4];
1289       ib = idx + ai[i];
1290       for ( j=0; j<n; j++ ) {
1291         rval      = ib[j]*5;
1292         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1293         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1294         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1295         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1296         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1297         v += 25;
1298       }
1299     }
1300     break;
1301       /* block sizes larger then 5 by 5 are handled by BLAS */
1302     default: {
1303       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1304       if (!a->mult_work) {
1305         k = PetscMax(a->m,a->n);
1306         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1307         CHKPTRQ(a->mult_work);
1308       }
1309       work = a->mult_work;
1310       for ( i=0; i<mbs; i++ ) {
1311         n     = ii[1] - ii[0]; ii++;
1312         ncols = n*bs;
1313         PetscMemzero(work,ncols*sizeof(Scalar));
1314         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1315         v += n*bs2;
1316         x += bs;
1317         workt = work;
1318         for ( j=0; j<n; j++ ) {
1319           zb = z + bs*(*idx++);
1320           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1321           workt += bs;
1322         }
1323       }
1324     }
1325   }
1326   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1327   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1328   PLogFlops(2*a->nz*a->bs2);
1329   return 0;
1330 }
1331 
1332 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1333 {
1334   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1335 
1336   info->rows_global    = (double)a->m;
1337   info->columns_global = (double)a->n;
1338   info->rows_local     = (double)a->m;
1339   info->columns_local  = (double)a->n;
1340   info->block_size     = a->bs2;
1341   info->nz_allocated   = a->maxnz;
1342   info->nz_used        = a->bs2*a->nz;
1343   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1344   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1345     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1346   info->assemblies   = A->num_ass;
1347   info->mallocs      = a->reallocs;
1348   info->memory       = A->mem;
1349   if (A->factor) {
1350     info->fill_ratio_given  = A->info.fill_ratio_given;
1351     info->fill_ratio_needed = A->info.fill_ratio_needed;
1352     info->factor_mallocs    = A->info.factor_mallocs;
1353   } else {
1354     info->fill_ratio_given  = 0;
1355     info->fill_ratio_needed = 0;
1356     info->factor_mallocs    = 0;
1357   }
1358   return 0;
1359 }
1360 
1361 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1362 {
1363   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1364 
1365   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1366 
1367   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1368   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1369       (a->nz != b->nz)) {
1370     *flg = PETSC_FALSE; return 0;
1371   }
1372 
1373   /* if the a->i are the same */
1374   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1375     *flg = PETSC_FALSE; return 0;
1376   }
1377 
1378   /* if a->j are the same */
1379   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1380     *flg = PETSC_FALSE; return 0;
1381   }
1382 
1383   /* if a->a are the same */
1384   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1385     *flg = PETSC_FALSE; return 0;
1386   }
1387   *flg = PETSC_TRUE;
1388   return 0;
1389 
1390 }
1391 
1392 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1393 {
1394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1395   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1396   Scalar      *x, zero = 0.0,*aa,*aa_j;
1397 
1398   bs   = a->bs;
1399   aa   = a->a;
1400   ai   = a->i;
1401   aj   = a->j;
1402   ambs = a->mbs;
1403   bs2  = a->bs2;
1404 
1405   VecSet(&zero,v);
1406   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1407   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1408   for ( i=0; i<ambs; i++ ) {
1409     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1410       if (aj[j] == i) {
1411         row  = i*bs;
1412         aa_j = aa+j*bs2;
1413         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1414         break;
1415       }
1416     }
1417   }
1418   return 0;
1419 }
1420 
1421 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1422 {
1423   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1424   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1425   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1426 
1427   ai  = a->i;
1428   aj  = a->j;
1429   aa  = a->a;
1430   m   = a->m;
1431   n   = a->n;
1432   bs  = a->bs;
1433   mbs = a->mbs;
1434   bs2 = a->bs2;
1435   if (ll) {
1436     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1437     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1438     for ( i=0; i<mbs; i++ ) { /* for each block row */
1439       M  = ai[i+1] - ai[i];
1440       li = l + i*bs;
1441       v  = aa + bs2*ai[i];
1442       for ( j=0; j<M; j++ ) { /* for each block */
1443         for ( k=0; k<bs2; k++ ) {
1444           (*v++) *= li[k%bs];
1445         }
1446       }
1447     }
1448   }
1449 
1450   if (rr) {
1451     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1452     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1453     for ( i=0; i<mbs; i++ ) { /* for each block row */
1454       M  = ai[i+1] - ai[i];
1455       v  = aa + bs2*ai[i];
1456       for ( j=0; j<M; j++ ) { /* for each block */
1457         ri = r + bs*aj[ai[i]+j];
1458         for ( k=0; k<bs; k++ ) {
1459           x = ri[k];
1460           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1461         }
1462       }
1463     }
1464   }
1465   return 0;
1466 }
1467 
1468 
1469 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1470 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1471 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1472 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1473 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1474 
1475 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1476 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1477 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1478 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1479 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1480 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1481 
1482 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1483 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1484 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1485 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1486 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1487 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1488 
1489 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1490 {
1491   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1492   Scalar      *v = a->a;
1493   double      sum = 0.0;
1494   int         i,nz=a->nz,bs2=a->bs2;
1495 
1496   if (type == NORM_FROBENIUS) {
1497     for (i=0; i< bs2*nz; i++ ) {
1498 #if defined(PETSC_COMPLEX)
1499       sum += real(conj(*v)*(*v)); v++;
1500 #else
1501       sum += (*v)*(*v); v++;
1502 #endif
1503     }
1504     *norm = sqrt(sum);
1505   }
1506   else {
1507     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1508   }
1509   return 0;
1510 }
1511 
1512 /*
1513      note: This can only work for identity for row and col. It would
1514    be good to check this and otherwise generate an error.
1515 */
1516 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1517 {
1518   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1519   Mat         outA;
1520   int         ierr;
1521 
1522   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1523 
1524   outA          = inA;
1525   inA->factor   = FACTOR_LU;
1526   a->row        = row;
1527   a->col        = col;
1528 
1529   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1530 
1531   if (!a->diag) {
1532     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1533   }
1534   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1535   return 0;
1536 }
1537 
1538 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1539 {
1540   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1541   int         one = 1, totalnz = a->bs2*a->nz;
1542   BLscal_( &totalnz, alpha, a->a, &one );
1543   PLogFlops(totalnz);
1544   return 0;
1545 }
1546 
1547 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1548 {
1549   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1550   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1551   int        *ai = a->i, *ailen = a->ilen;
1552   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1553   Scalar     *ap, *aa = a->a, zero = 0.0;
1554 
1555   for ( k=0; k<m; k++ ) { /* loop over rows */
1556     row  = im[k]; brow = row/bs;
1557     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1558     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1559     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1560     nrow = ailen[brow];
1561     for ( l=0; l<n; l++ ) { /* loop over columns */
1562       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1563       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1564       col  = in[l] ;
1565       bcol = col/bs;
1566       cidx = col%bs;
1567       ridx = row%bs;
1568       high = nrow;
1569       low  = 0; /* assume unsorted */
1570       while (high-low > 5) {
1571         t = (low+high)/2;
1572         if (rp[t] > bcol) high = t;
1573         else             low  = t;
1574       }
1575       for ( i=low; i<high; i++ ) {
1576         if (rp[i] > bcol) break;
1577         if (rp[i] == bcol) {
1578           *v++ = ap[bs2*i+bs*cidx+ridx];
1579           goto finished;
1580         }
1581       }
1582       *v++ = zero;
1583       finished:;
1584     }
1585   }
1586   return 0;
1587 }
1588 
1589 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1590 {
1591   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1592   *bs = baij->bs;
1593   return 0;
1594 }
1595 
1596 /* idx should be of length atlease bs */
1597 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1598 {
1599   int i,row;
1600   row = idx[0];
1601   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1602 
1603   for ( i=1; i<bs; i++ ) {
1604     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1605   }
1606   *flg = PETSC_TRUE;
1607   return 0;
1608 }
1609 
1610 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1611 {
1612   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1613   IS          is_local;
1614   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1615   PetscTruth  flg;
1616   Scalar      zero = 0.0,*aa;
1617 
1618   /* Make a copy of the IS and  sort it */
1619   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1620   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1621   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1622   ierr = ISSort(is_local); CHKERRQ(ierr);
1623   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1624 
1625   i = 0;
1626   while (i < is_n) {
1627     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1628     flg = PETSC_FALSE;
1629     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1630     if (flg) { /* There exists a block of rows to be Zerowed */
1631       baij->ilen[rows[i]/bs] = 0;
1632       i += bs;
1633     } else { /* Zero out only the requested row */
1634       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1635       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1636       for ( j=0; j<count; j++ ) {
1637         aa[0] = zero;
1638         aa+=bs;
1639       }
1640       i++;
1641     }
1642   }
1643   if (diag) {
1644     for ( j=0; j<is_n; j++ ) {
1645       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1646     }
1647   }
1648   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1649   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1650   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1651   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1652   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1653 
1654   return 0;
1655 }
1656 int MatPrintHelp_SeqBAIJ(Mat A)
1657 {
1658   static int called = 0;
1659   MPI_Comm   comm = A->comm;
1660 
1661   if (called) return 0; else called = 1;
1662   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1663   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1664   return 0;
1665 }
1666 
1667 /* -------------------------------------------------------------------*/
1668 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1669        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1670        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1671        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1672        MatSolve_SeqBAIJ_N,0,
1673        0,0,
1674        MatLUFactor_SeqBAIJ,0,
1675        0,
1676        MatTranspose_SeqBAIJ,
1677        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1678        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1679        0,MatAssemblyEnd_SeqBAIJ,
1680        0,
1681        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1682        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1683        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1684        MatILUFactorSymbolic_SeqBAIJ,0,
1685        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1686        MatConvertSameType_SeqBAIJ,0,0,
1687        MatILUFactor_SeqBAIJ,0,0,
1688        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1689        MatGetValues_SeqBAIJ,0,
1690        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1691        0,0,0,MatGetBlockSize_SeqBAIJ,
1692        MatGetRowIJ_SeqBAIJ,
1693        MatRestoreRowIJ_SeqBAIJ};
1694 
1695 /*@C
1696    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1697    compressed row) format.  For good matrix assembly performance the
1698    user should preallocate the matrix storage by setting the parameter nz
1699    (or the array nzz).  By setting these parameters accurately, performance
1700    during matrix assembly can be increased by more than a factor of 50.
1701 
1702    Input Parameters:
1703 .  comm - MPI communicator, set to MPI_COMM_SELF
1704 .  bs - size of block
1705 .  m - number of rows
1706 .  n - number of columns
1707 .  nz - number of block nonzeros per block row (same for all rows)
1708 .  nzz - array containing the number of block nonzeros in the various block rows
1709          (possibly different for each block row) or PETSC_NULL
1710 
1711    Output Parameter:
1712 .  A - the matrix
1713 
1714    Notes:
1715    The block AIJ format is fully compatible with standard Fortran 77
1716    storage.  That is, the stored row and column indices can begin at
1717    either one (as in Fortran) or zero.  See the users' manual for details.
1718 
1719    Specify the preallocated storage with either nz or nnz (not both).
1720    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1721    allocation.  For additional details, see the users manual chapter on
1722    matrices and the file $(PETSC_DIR)/Performance.
1723 
1724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1725 @*/
1726 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1727 {
1728   Mat         B;
1729   Mat_SeqBAIJ *b;
1730   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1731 
1732   MPI_Comm_size(comm,&size);
1733   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1734 
1735   if (mbs*bs!=m || nbs*bs!=n)
1736     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1737 
1738   *A = 0;
1739   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1740   PLogObjectCreate(B);
1741   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1742   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1743   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1744   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1745   if (!flg) {
1746     switch (bs) {
1747       case 1:
1748         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1749         B->ops.solve           = MatSolve_SeqBAIJ_1;
1750         B->ops.mult            = MatMult_SeqBAIJ_1;
1751         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1752        break;
1753       case 2:
1754         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1755         B->ops.solve           = MatSolve_SeqBAIJ_2;
1756         B->ops.mult            = MatMult_SeqBAIJ_2;
1757         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1758         break;
1759       case 3:
1760         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1761         B->ops.solve           = MatSolve_SeqBAIJ_3;
1762         B->ops.mult            = MatMult_SeqBAIJ_3;
1763         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1764         break;
1765       case 4:
1766         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1767         B->ops.solve           = MatSolve_SeqBAIJ_4;
1768         B->ops.mult            = MatMult_SeqBAIJ_4;
1769         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1770         break;
1771       case 5:
1772         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1773         B->ops.solve           = MatSolve_SeqBAIJ_5;
1774         B->ops.mult            = MatMult_SeqBAIJ_5;
1775         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1776         break;
1777     }
1778   }
1779   B->destroy          = MatDestroy_SeqBAIJ;
1780   B->view             = MatView_SeqBAIJ;
1781   B->factor           = 0;
1782   B->lupivotthreshold = 1.0;
1783   b->row              = 0;
1784   b->col              = 0;
1785   b->reallocs         = 0;
1786 
1787   b->m       = m; B->m = m; B->M = m;
1788   b->n       = n; B->n = n; B->N = n;
1789   b->mbs     = mbs;
1790   b->nbs     = nbs;
1791   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1792   if (nnz == PETSC_NULL) {
1793     if (nz == PETSC_DEFAULT) nz = 5;
1794     else if (nz <= 0)        nz = 1;
1795     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1796     nz = nz*mbs;
1797   }
1798   else {
1799     nz = 0;
1800     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1801   }
1802 
1803   /* allocate the matrix space */
1804   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1805   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1806   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1807   b->j  = (int *) (b->a + nz*bs2);
1808   PetscMemzero(b->j,nz*sizeof(int));
1809   b->i  = b->j + nz;
1810   b->singlemalloc = 1;
1811 
1812   b->i[0] = 0;
1813   for (i=1; i<mbs+1; i++) {
1814     b->i[i] = b->i[i-1] + b->imax[i-1];
1815   }
1816 
1817   /* b->ilen will count nonzeros in each block row so far. */
1818   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1819   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1820   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1821 
1822   b->bs               = bs;
1823   b->bs2              = bs2;
1824   b->mbs              = mbs;
1825   b->nz               = 0;
1826   b->maxnz            = nz*bs2;
1827   b->sorted           = 0;
1828   b->roworiented      = 1;
1829   b->nonew            = 0;
1830   b->diag             = 0;
1831   b->solve_work       = 0;
1832   b->mult_work        = 0;
1833   b->spptr            = 0;
1834   B->info.nz_unneeded = (double)b->maxnz;
1835 
1836   *A = B;
1837   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1838   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1839   return 0;
1840 }
1841 
1842 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1843 {
1844   Mat         C;
1845   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1846   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1847 
1848   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1849 
1850   *B = 0;
1851   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1852   PLogObjectCreate(C);
1853   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1854   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1855   C->destroy    = MatDestroy_SeqBAIJ;
1856   C->view       = MatView_SeqBAIJ;
1857   C->factor     = A->factor;
1858   c->row        = 0;
1859   c->col        = 0;
1860   C->assembled  = PETSC_TRUE;
1861 
1862   c->m = C->m   = a->m;
1863   c->n = C->n   = a->n;
1864   C->M          = a->m;
1865   C->N          = a->n;
1866 
1867   c->bs         = a->bs;
1868   c->bs2        = a->bs2;
1869   c->mbs        = a->mbs;
1870   c->nbs        = a->nbs;
1871 
1872   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1873   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1874   for ( i=0; i<mbs; i++ ) {
1875     c->imax[i] = a->imax[i];
1876     c->ilen[i] = a->ilen[i];
1877   }
1878 
1879   /* allocate the matrix space */
1880   c->singlemalloc = 1;
1881   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1882   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1883   c->j  = (int *) (c->a + nz*bs2);
1884   c->i  = c->j + nz;
1885   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1886   if (mbs > 0) {
1887     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1888     if (cpvalues == COPY_VALUES) {
1889       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1890     }
1891   }
1892 
1893   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1894   c->sorted      = a->sorted;
1895   c->roworiented = a->roworiented;
1896   c->nonew       = a->nonew;
1897 
1898   if (a->diag) {
1899     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1900     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1901     for ( i=0; i<mbs; i++ ) {
1902       c->diag[i] = a->diag[i];
1903     }
1904   }
1905   else c->diag          = 0;
1906   c->nz                 = a->nz;
1907   c->maxnz              = a->maxnz;
1908   c->solve_work         = 0;
1909   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1910   c->mult_work          = 0;
1911   *B = C;
1912   return 0;
1913 }
1914 
1915 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1916 {
1917   Mat_SeqBAIJ  *a;
1918   Mat          B;
1919   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1920   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1921   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1922   int          *masked, nmask,tmp,bs2,ishift;
1923   Scalar       *aa;
1924   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1925 
1926   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1927   bs2  = bs*bs;
1928 
1929   MPI_Comm_size(comm,&size);
1930   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1931   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1932   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1933   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1934   M = header[1]; N = header[2]; nz = header[3];
1935 
1936   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1937 
1938   /*
1939      This code adds extra rows to make sure the number of rows is
1940     divisible by the blocksize
1941   */
1942   mbs        = M/bs;
1943   extra_rows = bs - M + bs*(mbs);
1944   if (extra_rows == bs) extra_rows = 0;
1945   else                  mbs++;
1946   if (extra_rows) {
1947     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1948   }
1949 
1950   /* read in row lengths */
1951   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1952   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1953   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1954 
1955   /* read in column indices */
1956   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1957   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1958   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1959 
1960   /* loop over row lengths determining block row lengths */
1961   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1962   PetscMemzero(browlengths,mbs*sizeof(int));
1963   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1964   PetscMemzero(mask,mbs*sizeof(int));
1965   masked = mask + mbs;
1966   rowcount = 0; nzcount = 0;
1967   for ( i=0; i<mbs; i++ ) {
1968     nmask = 0;
1969     for ( j=0; j<bs; j++ ) {
1970       kmax = rowlengths[rowcount];
1971       for ( k=0; k<kmax; k++ ) {
1972         tmp = jj[nzcount++]/bs;
1973         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1974       }
1975       rowcount++;
1976     }
1977     browlengths[i] += nmask;
1978     /* zero out the mask elements we set */
1979     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1980   }
1981 
1982   /* create our matrix */
1983   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1984          CHKERRQ(ierr);
1985   B = *A;
1986   a = (Mat_SeqBAIJ *) B->data;
1987 
1988   /* set matrix "i" values */
1989   a->i[0] = 0;
1990   for ( i=1; i<= mbs; i++ ) {
1991     a->i[i]      = a->i[i-1] + browlengths[i-1];
1992     a->ilen[i-1] = browlengths[i-1];
1993   }
1994   a->nz         = 0;
1995   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1996 
1997   /* read in nonzero values */
1998   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1999   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2000   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2001 
2002   /* set "a" and "j" values into matrix */
2003   nzcount = 0; jcount = 0;
2004   for ( i=0; i<mbs; i++ ) {
2005     nzcountb = nzcount;
2006     nmask    = 0;
2007     for ( j=0; j<bs; j++ ) {
2008       kmax = rowlengths[i*bs+j];
2009       for ( k=0; k<kmax; k++ ) {
2010         tmp = jj[nzcount++]/bs;
2011 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2012       }
2013       rowcount++;
2014     }
2015     /* sort the masked values */
2016     PetscSortInt(nmask,masked);
2017 
2018     /* set "j" values into matrix */
2019     maskcount = 1;
2020     for ( j=0; j<nmask; j++ ) {
2021       a->j[jcount++]  = masked[j];
2022       mask[masked[j]] = maskcount++;
2023     }
2024     /* set "a" values into matrix */
2025     ishift = bs2*a->i[i];
2026     for ( j=0; j<bs; j++ ) {
2027       kmax = rowlengths[i*bs+j];
2028       for ( k=0; k<kmax; k++ ) {
2029         tmp    = jj[nzcountb]/bs ;
2030         block  = mask[tmp] - 1;
2031         point  = jj[nzcountb] - bs*tmp;
2032         idx    = ishift + bs2*block + j + bs*point;
2033         a->a[idx] = aa[nzcountb++];
2034       }
2035     }
2036     /* zero out the mask elements we set */
2037     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2038   }
2039   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2040 
2041   PetscFree(rowlengths);
2042   PetscFree(browlengths);
2043   PetscFree(aa);
2044   PetscFree(jj);
2045   PetscFree(mask);
2046 
2047   B->assembled = PETSC_TRUE;
2048 
2049   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2050   if (flg) {
2051     Viewer tviewer;
2052     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2053     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2054     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2055     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2056   }
2057   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2058   if (flg) {
2059     Viewer tviewer;
2060     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2061     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2062     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2063     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2064   }
2065   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2066   if (flg) {
2067     Viewer tviewer;
2068     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2069     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2070     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2071   }
2072   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2073   if (flg) {
2074     Viewer tviewer;
2075     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2076     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2077     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2078     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2079   }
2080   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2081   if (flg) {
2082     Viewer tviewer;
2083     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2084     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2085     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2086     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2087   }
2088   return 0;
2089 }
2090 
2091 
2092 
2093