xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4e126f44ee86c2181996ac598c997002e40713ba)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.72 1996/11/26 16:23:34 bsmith Exp curfman $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 int MatMarkDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         i,j, *diag, m = a->mbs;
23 
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28       if (a->j[j] == i) {
29         diag[i] = j;
30         break;
31       }
32     }
33   }
34   a->diag = diag;
35   return 0;
36 }
37 
38 #include "draw.h"
39 #include "pinclude/pviewer.h"
40 #include "sys.h"
41 
42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
43 
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   *nn = n;
51   if (!ia) return 0;
52   if (symmetric) {
53     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
54   } else if (oshift == 1) {
55     /* temporarily add 1 to i and j indices */
56     int nz = a->i[n] + 1;
57     for ( i=0; i<nz; i++ ) a->j[i]++;
58     for ( i=0; i<n+1; i++ ) a->i[i]++;
59     *ia = a->i; *ja = a->j;
60   } else {
61     *ia = a->i; *ja = a->j;
62   }
63 
64   return 0;
65 }
66 
67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
68                                 PetscTruth *done)
69 {
70   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
71   int         i,n = a->mbs;
72 
73   if (!ia) return 0;
74   if (symmetric) {
75     PetscFree(*ia);
76     PetscFree(*ja);
77   } else if (oshift == 1) {
78     int nz = a->i[n];
79     for ( i=0; i<nz; i++ ) a->j[i]--;
80     for ( i=0; i<n+1; i++ ) a->i[i]--;
81   }
82   return 0;
83 }
84 
85 
86 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
87 {
88   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
89   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90   Scalar      *aa;
91 
92   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
94   col_lens[0] = MAT_COOKIE;
95 
96   col_lens[1] = a->m;
97   col_lens[2] = a->n;
98   col_lens[3] = a->nz*bs2;
99 
100   /* store lengths of each row and write (including header) to file */
101   count = 0;
102   for ( i=0; i<a->mbs; i++ ) {
103     for ( j=0; j<bs; j++ ) {
104       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105     }
106   }
107   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
108   PetscFree(col_lens);
109 
110   /* store column indices (zero start index) */
111   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116         for ( l=0; l<bs; l++ ) {
117           jj[count++] = bs*a->j[k] + l;
118         }
119       }
120     }
121   }
122   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123   PetscFree(jj);
124 
125   /* store nonzero values */
126   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127   count = 0;
128   for ( i=0; i<a->mbs; i++ ) {
129     for ( j=0; j<bs; j++ ) {
130       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131         for ( l=0; l<bs; l++ ) {
132           aa[count++] = a->a[bs2*k + l*bs + j];
133         }
134       }
135     }
136   }
137   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138   PetscFree(aa);
139   return 0;
140 }
141 
142 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
143 {
144   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
145   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
146   FILE        *fd;
147   char        *outputname;
148 
149   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
150   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
151   ierr = ViewerGetFormat(viewer,&format);
152   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
153     fprintf(fd,"  block size is %d\n",bs);
154   }
155   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
157   }
158   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
159     for ( i=0; i<a->mbs; i++ ) {
160       for ( j=0; j<bs; j++ ) {
161         fprintf(fd,"row %d:",i*bs+j);
162         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
163           for ( l=0; l<bs; l++ ) {
164 #if defined(PETSC_COMPLEX)
165           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
166             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
167               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
168           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
169             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
170 #else
171           if (a->a[bs2*k + l*bs + j] != 0.0)
172             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
173 #endif
174           }
175         }
176         fprintf(fd,"\n");
177       }
178     }
179   }
180   else {
181     for ( i=0; i<a->mbs; i++ ) {
182       for ( j=0; j<bs; j++ ) {
183         fprintf(fd,"row %d:",i*bs+j);
184         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185           for ( l=0; l<bs; l++ ) {
186 #if defined(PETSC_COMPLEX)
187           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
188             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
189               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
190           }
191           else {
192             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
193           }
194 #else
195             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
196 #endif
197           }
198         }
199         fprintf(fd,"\n");
200       }
201     }
202   }
203   fflush(fd);
204   return 0;
205 }
206 
207 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
208 {
209   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
210   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
211   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
212   Scalar       *aa;
213   Draw         draw;
214   DrawButton   button;
215   PetscTruth   isnull;
216 
217   ViewerDrawGetDraw(viewer,&draw);
218   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
219 
220   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
221   xr += w;    yr += h;  xl = -w;     yl = -h;
222   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
223   /* loop over matrix elements drawing boxes */
224   color = DRAW_BLUE;
225   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
226     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
227       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
228       x_l = a->j[j]*bs; x_r = x_l + 1.0;
229       aa = a->a + j*bs2;
230       for ( k=0; k<bs; k++ ) {
231         for ( l=0; l<bs; l++ ) {
232           if (PetscReal(*aa++) >=  0.) continue;
233           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
234         }
235       }
236     }
237   }
238   color = DRAW_CYAN;
239   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
240     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
241       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
242       x_l = a->j[j]*bs; x_r = x_l + 1.0;
243       aa = a->a + j*bs2;
244       for ( k=0; k<bs; k++ ) {
245         for ( l=0; l<bs; l++ ) {
246           if (PetscReal(*aa++) != 0.) continue;
247           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
248         }
249       }
250     }
251   }
252 
253   color = DRAW_RED;
254   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
255     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
256       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
257       x_l = a->j[j]*bs; x_r = x_l + 1.0;
258       aa = a->a + j*bs2;
259       for ( k=0; k<bs; k++ ) {
260         for ( l=0; l<bs; l++ ) {
261           if (PetscReal(*aa++) <= 0.) continue;
262           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
263         }
264       }
265     }
266   }
267 
268   DrawFlush(draw);
269   DrawGetPause(draw,&pause);
270   if (pause >= 0) { PetscSleep(pause); return 0;}
271 
272   /* allow the matrix to zoom or shrink */
273   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
274   while (button != BUTTON_RIGHT) {
275     DrawClear(draw);
276     if (button == BUTTON_LEFT) scale = .5;
277     else if (button == BUTTON_CENTER) scale = 2.;
278     xl = scale*(xl + w - xc) + xc - w*scale;
279     xr = scale*(xr - w - xc) + xc + w*scale;
280     yl = scale*(yl + h - yc) + yc - h*scale;
281     yr = scale*(yr - h - yc) + yc + h*scale;
282     w *= scale; h *= scale;
283     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
284     color = DRAW_BLUE;
285     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
286       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
287         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
288         x_l = a->j[j]*bs; x_r = x_l + 1.0;
289         aa = a->a + j*bs2;
290         for ( k=0; k<bs; k++ ) {
291           for ( l=0; l<bs; l++ ) {
292             if (PetscReal(*aa++) >=  0.) continue;
293             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
294           }
295         }
296       }
297     }
298     color = DRAW_CYAN;
299     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
300       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
301         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
302         x_l = a->j[j]*bs; x_r = x_l + 1.0;
303         aa = a->a + j*bs2;
304         for ( k=0; k<bs; k++ ) {
305           for ( l=0; l<bs; l++ ) {
306           if (PetscReal(*aa++) != 0.) continue;
307           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
308           }
309         }
310       }
311     }
312 
313     color = DRAW_RED;
314     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
315       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
316         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
317         x_l = a->j[j]*bs; x_r = x_l + 1.0;
318         aa = a->a + j*bs2;
319         for ( k=0; k<bs; k++ ) {
320           for ( l=0; l<bs; l++ ) {
321             if (PetscReal(*aa++) <= 0.) continue;
322             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
323           }
324         }
325       }
326     }
327     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
328   }
329   return 0;
330 }
331 
332 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
333 {
334   Mat         A = (Mat) obj;
335   ViewerType  vtype;
336   int         ierr;
337 
338   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
339   if (vtype == MATLAB_VIEWER) {
340     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
341   }
342   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343     return MatView_SeqBAIJ_ASCII(A,viewer);
344   }
345   else if (vtype == BINARY_FILE_VIEWER) {
346     return MatView_SeqBAIJ_Binary(A,viewer);
347   }
348   else if (vtype == DRAW_VIEWER) {
349     return MatView_SeqBAIJ_Draw(A,viewer);
350   }
351   return 0;
352 }
353 
354 #define CHUNKSIZE  10
355 
356 /* This version has row oriented v  */
357 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
361   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
362   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
363   int          ridx,cidx,bs2=a->bs2;
364   Scalar      *ap,value,*aa=a->a,*bap;
365 
366   for ( k=0; k<m; k++ ) { /* loop over added rows */
367     row  = im[k]; brow = row/bs;
368 #if defined(PETSC_BOPT_g)
369     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
370     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
371 #endif
372     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
373     rmax = imax[brow]; nrow = ailen[brow];
374     low = 0;
375     for ( l=0; l<n; l++ ) { /* loop over added columns */
376 #if defined(PETSC_BOPT_g)
377       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
378       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
379 #endif
380       col = in[l]; bcol = col/bs;
381       ridx = row % bs; cidx = col % bs;
382       if (roworiented) {
383         value = *v++;
384       }
385       else {
386         value = v[k + l*m];
387       }
388       if (!sorted) low = 0; high = nrow;
389       while (high-low > 5) {
390         t = (low+high)/2;
391         if (rp[t] > bcol) high = t;
392         else              low  = t;
393       }
394       for ( i=low; i<high; i++ ) {
395         if (rp[i] > bcol) break;
396         if (rp[i] == bcol) {
397           bap  = ap +  bs2*i + bs*cidx + ridx;
398           if (is == ADD_VALUES) *bap += value;
399           else                  *bap  = value;
400           goto noinsert;
401         }
402       }
403       if (nonew) goto noinsert;
404       if (nrow >= rmax) {
405         /* there is no extra room in row, therefore enlarge */
406         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
407         Scalar *new_a;
408 
409         /* malloc new storage space */
410         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
411         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
412         new_j   = (int *) (new_a + bs2*new_nz);
413         new_i   = new_j + new_nz;
414 
415         /* copy over old data into new slots */
416         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
417         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
418         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
419         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
420         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
421                                                            len*sizeof(int));
422         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
423         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
424         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
425                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
426         /* free up old matrix storage */
427         PetscFree(a->a);
428         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
429         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
430         a->singlemalloc = 1;
431 
432         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
433         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
434         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
435         a->maxnz += bs2*CHUNKSIZE;
436         a->reallocs++;
437         a->nz++;
438       }
439       N = nrow++ - 1;
440       /* shift up all the later entries in this row */
441       for ( ii=N; ii>=i; ii-- ) {
442         rp[ii+1] = rp[ii];
443         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
444       }
445       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
446       rp[i]                      = bcol;
447       ap[bs2*i + bs*cidx + ridx] = value;
448       noinsert:;
449       low = i;
450     }
451     ailen[brow] = nrow;
452   }
453   return 0;
454 }
455 
456 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
457 {
458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
459   *m = a->m; *n = a->n;
460   return 0;
461 }
462 
463 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = 0; *n = a->m;
467   return 0;
468 }
469 
470 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
474   Scalar      *aa,*v_i,*aa_i;
475 
476   bs  = a->bs;
477   ai  = a->i;
478   aj  = a->j;
479   aa  = a->a;
480   bs2 = a->bs2;
481 
482   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
483 
484   bn  = row/bs;   /* Block number */
485   bp  = row % bs; /* Block Position */
486   M   = ai[bn+1] - ai[bn];
487   *nz = bs*M;
488 
489   if (v) {
490     *v = 0;
491     if (*nz) {
492       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
493       for ( i=0; i<M; i++ ) { /* for each block in the block row */
494         v_i  = *v + i*bs;
495         aa_i = aa + bs2*(ai[bn] + i);
496         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
497       }
498     }
499   }
500 
501   if (idx) {
502     *idx = 0;
503     if (*nz) {
504       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
505       for ( i=0; i<M; i++ ) { /* for each block in the block row */
506         idx_i = *idx + i*bs;
507         itmp  = bs*aj[ai[bn] + i];
508         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
509       }
510     }
511   }
512   return 0;
513 }
514 
515 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
516 {
517   if (idx) {if (*idx) PetscFree(*idx);}
518   if (v)   {if (*v)   PetscFree(*v);}
519   return 0;
520 }
521 
522 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
523 {
524   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
525   Mat         C;
526   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
527   int         *rows,*cols,bs2=a->bs2;
528   Scalar      *array=a->a;
529 
530   if (B==PETSC_NULL && mbs!=nbs)
531     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
532   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
533   PetscMemzero(col,(1+nbs)*sizeof(int));
534 
535   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
536   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
537   PetscFree(col);
538   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
539   cols = rows + bs;
540   for ( i=0; i<mbs; i++ ) {
541     cols[0] = i*bs;
542     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
543     len = ai[i+1] - ai[i];
544     for ( j=0; j<len; j++ ) {
545       rows[0] = (*aj++)*bs;
546       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
547       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
548       array += bs2;
549     }
550   }
551   PetscFree(rows);
552 
553   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
554   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555 
556   if (B != PETSC_NULL) {
557     *B = C;
558   } else {
559     /* This isn't really an in-place transpose */
560     PetscFree(a->a);
561     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
562     if (a->diag) PetscFree(a->diag);
563     if (a->ilen) PetscFree(a->ilen);
564     if (a->imax) PetscFree(a->imax);
565     if (a->solve_work) PetscFree(a->solve_work);
566     PetscFree(a);
567     PetscMemcpy(A,C,sizeof(struct _Mat));
568     PetscHeaderDestroy(C);
569   }
570   return 0;
571 }
572 
573 
574 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
575 {
576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
577   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
578   int        m = a->m,*ip, N, *ailen = a->ilen;
579   int        mbs = a->mbs, bs2 = a->bs2;
580   Scalar     *aa = a->a, *ap;
581 
582   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
583 
584   for ( i=1; i<mbs; i++ ) {
585     /* move each row back by the amount of empty slots (fshift) before it*/
586     fshift += imax[i-1] - ailen[i-1];
587     if (fshift) {
588       ip = aj + ai[i]; ap = aa + bs2*ai[i];
589       N = ailen[i];
590       for ( j=0; j<N; j++ ) {
591         ip[j-fshift] = ip[j];
592         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
593       }
594     }
595     ai[i] = ai[i-1] + ailen[i-1];
596   }
597   if (mbs) {
598     fshift += imax[mbs-1] - ailen[mbs-1];
599     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
600   }
601   /* reset ilen and imax for each row */
602   for ( i=0; i<mbs; i++ ) {
603     ailen[i] = imax[i] = ai[i+1] - ai[i];
604   }
605   a->nz = ai[mbs];
606 
607   /* diagonals may have moved, so kill the diagonal pointers */
608   if (fshift && a->diag) {
609     PetscFree(a->diag);
610     PLogObjectMemory(A,-(m+1)*sizeof(int));
611     a->diag = 0;
612   }
613   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
614            fshift*bs2,a->nz*bs2,m,a->bs);
615   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
616            a->reallocs);
617   A->info.nz_unneeded  = (double)fshift*bs2;
618 
619   return 0;
620 }
621 
622 static int MatZeroEntries_SeqBAIJ(Mat A)
623 {
624   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
625   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
626   return 0;
627 }
628 
629 int MatDestroy_SeqBAIJ(PetscObject obj)
630 {
631   Mat         A  = (Mat) obj;
632   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
633   int         ierr;
634 
635 #if defined(PETSC_LOG)
636   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
637 #endif
638   PetscFree(a->a);
639   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
640   if (a->diag) PetscFree(a->diag);
641   if (a->ilen) PetscFree(a->ilen);
642   if (a->imax) PetscFree(a->imax);
643   if (a->solve_work) PetscFree(a->solve_work);
644   if (a->mult_work) PetscFree(a->mult_work);
645   PetscFree(a);
646   if (A->mapping) {
647     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
648   }
649   PLogObjectDestroy(A);
650   PetscHeaderDestroy(A);
651   return 0;
652 }
653 
654 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
655 {
656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
657   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
658   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
659   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
660   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
661   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
662   else if (op == MAT_ROWS_SORTED ||
663            op == MAT_SYMMETRIC ||
664            op == MAT_STRUCTURALLY_SYMMETRIC ||
665            op == MAT_YES_NEW_DIAGONALS ||
666            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
667     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
668   else if (op == MAT_NO_NEW_DIAGONALS)
669     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
670   else
671     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
672   return 0;
673 }
674 
675 
676 /* -------------------------------------------------------*/
677 /* Should check that shapes of vectors and matrices match */
678 /* -------------------------------------------------------*/
679 #include "pinclude/plapack.h"
680 
681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
682 {
683   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
684   register Scalar *x,*z,*v,sum;
685   int             mbs=a->mbs,i,*idx,*ii,n;
686 
687   VecGetArray_Fast(xx,x);
688   VecGetArray_Fast(zz,z);
689 
690   idx   = a->j;
691   v     = a->a;
692   ii    = a->i;
693 
694   for ( i=0; i<mbs; i++ ) {
695     n    = ii[1] - ii[0]; ii++;
696     sum  = 0.0;
697     while (n--) sum += *v++ * x[*idx++];
698     z[i] = sum;
699   }
700   VecRestoreArray_Fast(xx,x);
701   VecRestoreArray_Fast(zz,z);
702   PLogFlops(2*a->nz - a->m);
703   return 0;
704 }
705 
706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
707 {
708   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
709   register Scalar *x,*z,*v,*xb,sum1,sum2;
710   register Scalar x1,x2;
711   int             mbs=a->mbs,i,*idx,*ii;
712   int             j,n;
713 
714   VecGetArray_Fast(xx,x);
715   VecGetArray_Fast(zz,z);
716 
717   idx   = a->j;
718   v     = a->a;
719   ii    = a->i;
720 
721   for ( i=0; i<mbs; i++ ) {
722     n  = ii[1] - ii[0]; ii++;
723     sum1 = 0.0; sum2 = 0.0;
724     for ( j=0; j<n; j++ ) {
725       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
726       sum1 += v[0]*x1 + v[2]*x2;
727       sum2 += v[1]*x1 + v[3]*x2;
728       v += 4;
729     }
730     z[0] = sum1; z[1] = sum2;
731     z += 2;
732   }
733   VecRestoreArray_Fast(xx,x);
734   VecRestoreArray_Fast(zz,z);
735   PLogFlops(4*a->nz - a->m);
736   return 0;
737 }
738 
739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
740 {
741   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
742   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
743   int             mbs=a->mbs,i,*idx,*ii,j,n;
744 
745   VecGetArray_Fast(xx,x);
746   VecGetArray_Fast(zz,z);
747 
748   idx   = a->j;
749   v     = a->a;
750   ii    = a->i;
751 
752   for ( i=0; i<mbs; i++ ) {
753     n  = ii[1] - ii[0]; ii++;
754     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
755     for ( j=0; j<n; j++ ) {
756       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
758       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
759       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
760       v += 9;
761     }
762     z[0] = sum1; z[1] = sum2; z[2] = sum3;
763     z += 3;
764   }
765   VecRestoreArray_Fast(xx,x);
766   VecRestoreArray_Fast(zz,z);
767   PLogFlops(18*a->nz - a->m);
768   return 0;
769 }
770 
771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
772 {
773   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
774   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
775   register Scalar x1,x2,x3,x4;
776   int             mbs=a->mbs,i,*idx,*ii;
777   int             j,n;
778 
779   VecGetArray_Fast(xx,x);
780   VecGetArray_Fast(zz,z);
781 
782   idx   = a->j;
783   v     = a->a;
784   ii    = a->i;
785 
786   for ( i=0; i<mbs; i++ ) {
787     n  = ii[1] - ii[0]; ii++;
788     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
789     for ( j=0; j<n; j++ ) {
790       xb = x + 4*(*idx++);
791       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
792       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
793       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
794       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
795       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
796       v += 16;
797     }
798     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
799     z += 4;
800   }
801   VecRestoreArray_Fast(xx,x);
802   VecRestoreArray_Fast(zz,z);
803   PLogFlops(32*a->nz - a->m);
804   return 0;
805 }
806 
807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
808 {
809   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
810   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
811   register Scalar x1,x2,x3,x4,x5;
812   int             mbs=a->mbs,i,*idx,*ii,j,n;
813 
814   VecGetArray_Fast(xx,x);
815   VecGetArray_Fast(zz,z);
816 
817   idx   = a->j;
818   v     = a->a;
819   ii    = a->i;
820 
821   for ( i=0; i<mbs; i++ ) {
822     n  = ii[1] - ii[0]; ii++;
823     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
824     for ( j=0; j<n; j++ ) {
825       xb = x + 5*(*idx++);
826       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
827       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
828       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
829       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
830       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
831       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
832       v += 25;
833     }
834     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
835     z += 5;
836   }
837   VecRestoreArray_Fast(xx,x);
838   VecRestoreArray_Fast(zz,z);
839   PLogFlops(50*a->nz - a->m);
840   return 0;
841 }
842 
843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
844 {
845   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
846   register Scalar *x,*z,*v,*xb;
847   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
848   int             _One = 1,ncols,k;
849   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
850 
851   VecGetArray_Fast(xx,x);
852   VecGetArray_Fast(zz,z);
853 
854   idx   = a->j;
855   v     = a->a;
856   ii    = a->i;
857 
858 
859   if (!a->mult_work) {
860     k = PetscMax(a->m,a->n);
861     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
862   }
863   work = a->mult_work;
864   for ( i=0; i<mbs; i++ ) {
865     n     = ii[1] - ii[0]; ii++;
866     ncols = n*bs;
867     workt = work;
868     for ( j=0; j<n; j++ ) {
869       xb = x + bs*(*idx++);
870       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
871       workt += bs;
872     }
873     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
874     v += n*bs2;
875     z += bs;
876   }
877   VecRestoreArray_Fast(xx,x);
878   VecRestoreArray_Fast(zz,z);
879   PLogFlops(2*a->nz*bs2 - a->m);
880   return 0;
881 }
882 
883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
884 {
885   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
886   register Scalar *x,*y,*z,*v,sum;
887   int             mbs=a->mbs,i,*idx,*ii,n;
888 
889   VecGetArray_Fast(xx,x);
890   VecGetArray_Fast(yy,y);
891   VecGetArray_Fast(zz,z);
892 
893   idx   = a->j;
894   v     = a->a;
895   ii    = a->i;
896 
897   for ( i=0; i<mbs; i++ ) {
898     n    = ii[1] - ii[0]; ii++;
899     sum  = y[i];
900     while (n--) sum += *v++ * x[*idx++];
901     z[i] = sum;
902   }
903   VecRestoreArray_Fast(xx,x);
904   VecRestoreArray_Fast(yy,y);
905   VecRestoreArray_Fast(zz,z);
906   PLogFlops(2*a->nz);
907   return 0;
908 }
909 
910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
911 {
912   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
913   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
914   register Scalar x1,x2;
915   int             mbs=a->mbs,i,*idx,*ii;
916   int             j,n;
917 
918   VecGetArray_Fast(xx,x);
919   VecGetArray_Fast(yy,y);
920   VecGetArray_Fast(zz,z);
921 
922   idx   = a->j;
923   v     = a->a;
924   ii    = a->i;
925 
926   for ( i=0; i<mbs; i++ ) {
927     n  = ii[1] - ii[0]; ii++;
928     sum1 = y[0]; sum2 = y[1];
929     for ( j=0; j<n; j++ ) {
930       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
931       sum1 += v[0]*x1 + v[2]*x2;
932       sum2 += v[1]*x1 + v[3]*x2;
933       v += 4;
934     }
935     z[0] = sum1; z[1] = sum2;
936     z += 2; y += 2;
937   }
938   VecRestoreArray_Fast(xx,x);
939   VecRestoreArray_Fast(yy,y);
940   VecRestoreArray_Fast(zz,z);
941   PLogFlops(4*a->nz);
942   return 0;
943 }
944 
945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
946 {
947   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
948   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
949   int             mbs=a->mbs,i,*idx,*ii,j,n;
950 
951   VecGetArray_Fast(xx,x);
952   VecGetArray_Fast(yy,y);
953   VecGetArray_Fast(zz,z);
954 
955   idx   = a->j;
956   v     = a->a;
957   ii    = a->i;
958 
959   for ( i=0; i<mbs; i++ ) {
960     n  = ii[1] - ii[0]; ii++;
961     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
962     for ( j=0; j<n; j++ ) {
963       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
964       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
965       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
966       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
967       v += 9;
968     }
969     z[0] = sum1; z[1] = sum2; z[2] = sum3;
970     z += 3; y += 3;
971   }
972   VecRestoreArray_Fast(xx,x);
973   VecRestoreArray_Fast(yy,y);
974   VecRestoreArray_Fast(zz,z);
975   PLogFlops(18*a->nz);
976   return 0;
977 }
978 
979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
982   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
983   register Scalar x1,x2,x3,x4;
984   int             mbs=a->mbs,i,*idx,*ii;
985   int             j,n;
986 
987   VecGetArray_Fast(xx,x);
988   VecGetArray_Fast(yy,y);
989   VecGetArray_Fast(zz,z);
990 
991   idx   = a->j;
992   v     = a->a;
993   ii    = a->i;
994 
995   for ( i=0; i<mbs; i++ ) {
996     n  = ii[1] - ii[0]; ii++;
997     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
998     for ( j=0; j<n; j++ ) {
999       xb = x + 4*(*idx++);
1000       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1001       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1002       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1003       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1004       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1005       v += 16;
1006     }
1007     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1008     z += 4; y += 4;
1009   }
1010   VecRestoreArray_Fast(xx,x);
1011   VecRestoreArray_Fast(yy,y);
1012   VecRestoreArray_Fast(zz,z);
1013   PLogFlops(32*a->nz);
1014   return 0;
1015 }
1016 
1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1018 {
1019   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1020   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1021   register Scalar x1,x2,x3,x4,x5;
1022   int             mbs=a->mbs,i,*idx,*ii,j,n;
1023 
1024   VecGetArray_Fast(xx,x);
1025   VecGetArray_Fast(yy,y);
1026   VecGetArray_Fast(zz,z);
1027 
1028   idx   = a->j;
1029   v     = a->a;
1030   ii    = a->i;
1031 
1032   for ( i=0; i<mbs; i++ ) {
1033     n  = ii[1] - ii[0]; ii++;
1034     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1035     for ( j=0; j<n; j++ ) {
1036       xb = x + 5*(*idx++);
1037       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1038       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1039       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1040       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1041       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1042       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1043       v += 25;
1044     }
1045     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1046     z += 5; y += 5;
1047   }
1048   VecRestoreArray_Fast(xx,x);
1049   VecRestoreArray_Fast(yy,y);
1050   VecRestoreArray_Fast(zz,z);
1051   PLogFlops(50*a->nz);
1052   return 0;
1053 }
1054 
1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1058   register Scalar *x,*z,*v,*xb;
1059   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1060   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1061 
1062   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1063 
1064   VecGetArray_Fast(xx,x);
1065   VecGetArray_Fast(zz,z);
1066 
1067   idx   = a->j;
1068   v     = a->a;
1069   ii    = a->i;
1070 
1071 
1072   if (!a->mult_work) {
1073     k = PetscMax(a->m,a->n);
1074     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1075   }
1076   work = a->mult_work;
1077   for ( i=0; i<mbs; i++ ) {
1078     n     = ii[1] - ii[0]; ii++;
1079     ncols = n*bs;
1080     workt = work;
1081     for ( j=0; j<n; j++ ) {
1082       xb = x + bs*(*idx++);
1083       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1084       workt += bs;
1085     }
1086     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1087     v += n*bs2;
1088     z += bs;
1089   }
1090   VecRestoreArray_Fast(xx,x);
1091   VecRestoreArray_Fast(zz,z);
1092   PLogFlops(2*a->nz*bs2 );
1093   return 0;
1094 }
1095 
1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1097 {
1098   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1099   Scalar          *xg,*zg,*zb;
1100   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1101   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1102   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1103 
1104 
1105   VecGetArray_Fast(xx,xg); x = xg;
1106   VecGetArray_Fast(zz,zg); z = zg;
1107   PetscMemzero(z,N*sizeof(Scalar));
1108 
1109   idx   = a->j;
1110   v     = a->a;
1111   ii    = a->i;
1112 
1113   switch (bs) {
1114   case 1:
1115     for ( i=0; i<mbs; i++ ) {
1116       n  = ii[1] - ii[0]; ii++;
1117       xb = x + i; x1 = xb[0];
1118       ib = idx + ai[i];
1119       for ( j=0; j<n; j++ ) {
1120         rval    = ib[j];
1121         z[rval] += *v++ * x1;
1122       }
1123     }
1124     break;
1125   case 2:
1126     for ( i=0; i<mbs; i++ ) {
1127       n  = ii[1] - ii[0]; ii++;
1128       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1129       ib = idx + ai[i];
1130       for ( j=0; j<n; j++ ) {
1131         rval      = ib[j]*2;
1132         z[rval++] += v[0]*x1 + v[1]*x2;
1133         z[rval++] += v[2]*x1 + v[3]*x2;
1134         v += 4;
1135       }
1136     }
1137     break;
1138   case 3:
1139     for ( i=0; i<mbs; i++ ) {
1140       n  = ii[1] - ii[0]; ii++;
1141       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1142       ib = idx + ai[i];
1143       for ( j=0; j<n; j++ ) {
1144         rval      = ib[j]*3;
1145         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1146         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1147         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1148         v += 9;
1149       }
1150     }
1151     break;
1152   case 4:
1153     for ( i=0; i<mbs; i++ ) {
1154       n  = ii[1] - ii[0]; ii++;
1155       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1156       ib = idx + ai[i];
1157       for ( j=0; j<n; j++ ) {
1158         rval      = ib[j]*4;
1159         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1160         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1161         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1162         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1163         v += 16;
1164       }
1165     }
1166     break;
1167   case 5:
1168     for ( i=0; i<mbs; i++ ) {
1169       n  = ii[1] - ii[0]; ii++;
1170       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171       x4 = xb[3];   x5 = xb[4];
1172       ib = idx + ai[i];
1173       for ( j=0; j<n; j++ ) {
1174         rval      = ib[j]*5;
1175         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180         v += 25;
1181       }
1182     }
1183     break;
1184       /* block sizes larger then 5 by 5 are handled by BLAS */
1185     default: {
1186       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1187       if (!a->mult_work) {
1188         k = PetscMax(a->m,a->n);
1189         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1190         CHKPTRQ(a->mult_work);
1191       }
1192       work = a->mult_work;
1193       for ( i=0; i<mbs; i++ ) {
1194         n     = ii[1] - ii[0]; ii++;
1195         ncols = n*bs;
1196         PetscMemzero(work,ncols*sizeof(Scalar));
1197         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1198         v += n*bs2;
1199         x += bs;
1200         workt = work;
1201         for ( j=0; j<n; j++ ) {
1202           zb = z + bs*(*idx++);
1203           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1204           workt += bs;
1205         }
1206       }
1207     }
1208   }
1209   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1210   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1211   PLogFlops(2*a->nz*a->bs2 - a->n);
1212   return 0;
1213 }
1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1217   Scalar          *xg,*zg,*zb;
1218   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1219   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1220   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1221 
1222 
1223 
1224   VecGetArray_Fast(xx,xg); x = xg;
1225   VecGetArray_Fast(zz,zg); z = zg;
1226 
1227   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1228   else PetscMemzero(z,N*sizeof(Scalar));
1229 
1230 
1231   idx   = a->j;
1232   v     = a->a;
1233   ii    = a->i;
1234 
1235   switch (bs) {
1236   case 1:
1237     for ( i=0; i<mbs; i++ ) {
1238       n  = ii[1] - ii[0]; ii++;
1239       xb = x + i; x1 = xb[0];
1240       ib = idx + ai[i];
1241       for ( j=0; j<n; j++ ) {
1242         rval    = ib[j];
1243         z[rval] += *v++ * x1;
1244       }
1245     }
1246     break;
1247   case 2:
1248     for ( i=0; i<mbs; i++ ) {
1249       n  = ii[1] - ii[0]; ii++;
1250       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1251       ib = idx + ai[i];
1252       for ( j=0; j<n; j++ ) {
1253         rval      = ib[j]*2;
1254         z[rval++] += v[0]*x1 + v[1]*x2;
1255         z[rval++] += v[2]*x1 + v[3]*x2;
1256         v += 4;
1257       }
1258     }
1259     break;
1260   case 3:
1261     for ( i=0; i<mbs; i++ ) {
1262       n  = ii[1] - ii[0]; ii++;
1263       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1264       ib = idx + ai[i];
1265       for ( j=0; j<n; j++ ) {
1266         rval      = ib[j]*3;
1267         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1268         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1269         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1270         v += 9;
1271       }
1272     }
1273     break;
1274   case 4:
1275     for ( i=0; i<mbs; i++ ) {
1276       n  = ii[1] - ii[0]; ii++;
1277       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278       ib = idx + ai[i];
1279       for ( j=0; j<n; j++ ) {
1280         rval      = ib[j]*4;
1281         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1282         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1283         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1284         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1285         v += 16;
1286       }
1287     }
1288     break;
1289   case 5:
1290     for ( i=0; i<mbs; i++ ) {
1291       n  = ii[1] - ii[0]; ii++;
1292       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1293       x4 = xb[3];   x5 = xb[4];
1294       ib = idx + ai[i];
1295       for ( j=0; j<n; j++ ) {
1296         rval      = ib[j]*5;
1297         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1298         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1299         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1300         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1301         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1302         v += 25;
1303       }
1304     }
1305     break;
1306       /* block sizes larger then 5 by 5 are handled by BLAS */
1307     default: {
1308       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1309       if (!a->mult_work) {
1310         k = PetscMax(a->m,a->n);
1311         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1312         CHKPTRQ(a->mult_work);
1313       }
1314       work = a->mult_work;
1315       for ( i=0; i<mbs; i++ ) {
1316         n     = ii[1] - ii[0]; ii++;
1317         ncols = n*bs;
1318         PetscMemzero(work,ncols*sizeof(Scalar));
1319         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1320         v += n*bs2;
1321         x += bs;
1322         workt = work;
1323         for ( j=0; j<n; j++ ) {
1324           zb = z + bs*(*idx++);
1325           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1326           workt += bs;
1327         }
1328       }
1329     }
1330   }
1331   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1333   PLogFlops(2*a->nz*a->bs2);
1334   return 0;
1335 }
1336 
1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1338 {
1339   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1340 
1341   info->rows_global    = (double)a->m;
1342   info->columns_global = (double)a->n;
1343   info->rows_local     = (double)a->m;
1344   info->columns_local  = (double)a->n;
1345   info->block_size     = a->bs2;
1346   info->nz_allocated   = a->maxnz;
1347   info->nz_used        = a->bs2*a->nz;
1348   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1349   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1350     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1351   info->assemblies   = A->num_ass;
1352   info->mallocs      = a->reallocs;
1353   info->memory       = A->mem;
1354   if (A->factor) {
1355     info->fill_ratio_given  = A->info.fill_ratio_given;
1356     info->fill_ratio_needed = A->info.fill_ratio_needed;
1357     info->factor_mallocs    = A->info.factor_mallocs;
1358   } else {
1359     info->fill_ratio_given  = 0;
1360     info->fill_ratio_needed = 0;
1361     info->factor_mallocs    = 0;
1362   }
1363   return 0;
1364 }
1365 
1366 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1367 {
1368   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1369 
1370   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1371 
1372   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1373   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1374       (a->nz != b->nz)) {
1375     *flg = PETSC_FALSE; return 0;
1376   }
1377 
1378   /* if the a->i are the same */
1379   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1380     *flg = PETSC_FALSE; return 0;
1381   }
1382 
1383   /* if a->j are the same */
1384   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1385     *flg = PETSC_FALSE; return 0;
1386   }
1387 
1388   /* if a->a are the same */
1389   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1390     *flg = PETSC_FALSE; return 0;
1391   }
1392   *flg = PETSC_TRUE;
1393   return 0;
1394 
1395 }
1396 
1397 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1398 {
1399   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1400   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1401   Scalar      *x, zero = 0.0,*aa,*aa_j;
1402 
1403   bs   = a->bs;
1404   aa   = a->a;
1405   ai   = a->i;
1406   aj   = a->j;
1407   ambs = a->mbs;
1408   bs2  = a->bs2;
1409 
1410   VecSet(&zero,v);
1411   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1412   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1413   for ( i=0; i<ambs; i++ ) {
1414     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1415       if (aj[j] == i) {
1416         row  = i*bs;
1417         aa_j = aa+j*bs2;
1418         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1419         break;
1420       }
1421     }
1422   }
1423   return 0;
1424 }
1425 
1426 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1427 {
1428   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1429   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1430   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1431 
1432   ai  = a->i;
1433   aj  = a->j;
1434   aa  = a->a;
1435   m   = a->m;
1436   n   = a->n;
1437   bs  = a->bs;
1438   mbs = a->mbs;
1439   bs2 = a->bs2;
1440   if (ll) {
1441     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1442     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1443     for ( i=0; i<mbs; i++ ) { /* for each block row */
1444       M  = ai[i+1] - ai[i];
1445       li = l + i*bs;
1446       v  = aa + bs2*ai[i];
1447       for ( j=0; j<M; j++ ) { /* for each block */
1448         for ( k=0; k<bs2; k++ ) {
1449           (*v++) *= li[k%bs];
1450         }
1451       }
1452     }
1453   }
1454 
1455   if (rr) {
1456     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1457     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1458     for ( i=0; i<mbs; i++ ) { /* for each block row */
1459       M  = ai[i+1] - ai[i];
1460       v  = aa + bs2*ai[i];
1461       for ( j=0; j<M; j++ ) { /* for each block */
1462         ri = r + bs*aj[ai[i]+j];
1463         for ( k=0; k<bs; k++ ) {
1464           x = ri[k];
1465           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1466         }
1467       }
1468     }
1469   }
1470   return 0;
1471 }
1472 
1473 
1474 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1475 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1476 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1477 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1478 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1479 
1480 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1481 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1482 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1483 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1484 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1485 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1486 
1487 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1488 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1489 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1490 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1491 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1492 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1493 
1494 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1495 {
1496   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1497   Scalar      *v = a->a;
1498   double      sum = 0.0;
1499   int         i,nz=a->nz,bs2=a->bs2;
1500 
1501   if (type == NORM_FROBENIUS) {
1502     for (i=0; i< bs2*nz; i++ ) {
1503 #if defined(PETSC_COMPLEX)
1504       sum += real(conj(*v)*(*v)); v++;
1505 #else
1506       sum += (*v)*(*v); v++;
1507 #endif
1508     }
1509     *norm = sqrt(sum);
1510   }
1511   else {
1512     SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet");
1513   }
1514   return 0;
1515 }
1516 
1517 /*
1518      note: This can only work for identity for row and col. It would
1519    be good to check this and otherwise generate an error.
1520 */
1521 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1522 {
1523   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1524   Mat         outA;
1525   int         ierr;
1526 
1527   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1528 
1529   outA          = inA;
1530   inA->factor   = FACTOR_LU;
1531   a->row        = row;
1532   a->col        = col;
1533 
1534   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1535 
1536   if (!a->diag) {
1537     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1538   }
1539   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1540   return 0;
1541 }
1542 
1543 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1544 {
1545   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1546   int         one = 1, totalnz = a->bs2*a->nz;
1547   BLscal_( &totalnz, alpha, a->a, &one );
1548   PLogFlops(totalnz);
1549   return 0;
1550 }
1551 
1552 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1553 {
1554   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1555   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1556   int        *ai = a->i, *ailen = a->ilen;
1557   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1558   Scalar     *ap, *aa = a->a, zero = 0.0;
1559 
1560   for ( k=0; k<m; k++ ) { /* loop over rows */
1561     row  = im[k]; brow = row/bs;
1562     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1563     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1564     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1565     nrow = ailen[brow];
1566     for ( l=0; l<n; l++ ) { /* loop over columns */
1567       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1568       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1569       col  = in[l] ;
1570       bcol = col/bs;
1571       cidx = col%bs;
1572       ridx = row%bs;
1573       high = nrow;
1574       low  = 0; /* assume unsorted */
1575       while (high-low > 5) {
1576         t = (low+high)/2;
1577         if (rp[t] > bcol) high = t;
1578         else             low  = t;
1579       }
1580       for ( i=low; i<high; i++ ) {
1581         if (rp[i] > bcol) break;
1582         if (rp[i] == bcol) {
1583           *v++ = ap[bs2*i+bs*cidx+ridx];
1584           goto finished;
1585         }
1586       }
1587       *v++ = zero;
1588       finished:;
1589     }
1590   }
1591   return 0;
1592 }
1593 
1594 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1595 {
1596   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1597   *bs = baij->bs;
1598   return 0;
1599 }
1600 
1601 /* idx should be of length atlease bs */
1602 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1603 {
1604   int i,row;
1605   row = idx[0];
1606   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1607 
1608   for ( i=1; i<bs; i++ ) {
1609     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1610   }
1611   *flg = PETSC_TRUE;
1612   return 0;
1613 }
1614 
1615 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1616 {
1617   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1618   IS          is_local;
1619   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1620   PetscTruth  flg;
1621   Scalar      zero = 0.0,*aa;
1622 
1623   /* Make a copy of the IS and  sort it */
1624   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1625   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1626   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1627   ierr = ISSort(is_local); CHKERRQ(ierr);
1628   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1629 
1630   i = 0;
1631   while (i < is_n) {
1632     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1633     flg = PETSC_FALSE;
1634     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1635     if (flg) { /* There exists a block of rows to be Zerowed */
1636       baij->ilen[rows[i]/bs] = 0;
1637       i += bs;
1638     } else { /* Zero out only the requested row */
1639       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1640       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1641       for ( j=0; j<count; j++ ) {
1642         aa[0] = zero;
1643         aa+=bs;
1644       }
1645       i++;
1646     }
1647   }
1648   if (diag) {
1649     for ( j=0; j<is_n; j++ ) {
1650       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1651     }
1652   }
1653   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1654   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1655   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1656   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1657 
1658   return 0;
1659 }
1660 int MatPrintHelp_SeqBAIJ(Mat A)
1661 {
1662   static int called = 0;
1663   MPI_Comm   comm = A->comm;
1664 
1665   if (called) return 0; else called = 1;
1666   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1667   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1668   return 0;
1669 }
1670 
1671 /* -------------------------------------------------------------------*/
1672 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1673        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1674        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1675        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1676        MatSolve_SeqBAIJ_N,0,
1677        0,0,
1678        MatLUFactor_SeqBAIJ,0,
1679        0,
1680        MatTranspose_SeqBAIJ,
1681        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1682        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1683        0,MatAssemblyEnd_SeqBAIJ,
1684        0,
1685        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1686        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1687        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1688        MatILUFactorSymbolic_SeqBAIJ,0,
1689        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1690        MatConvertSameType_SeqBAIJ,0,0,
1691        MatILUFactor_SeqBAIJ,0,0,
1692        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1693        MatGetValues_SeqBAIJ,0,
1694        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1695        0,0,0,MatGetBlockSize_SeqBAIJ,
1696        MatGetRowIJ_SeqBAIJ,
1697        MatRestoreRowIJ_SeqBAIJ};
1698 
1699 /*@C
1700    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1701    compressed row) format.  For good matrix assembly performance the
1702    user should preallocate the matrix storage by setting the parameter nz
1703    (or the array nzz).  By setting these parameters accurately, performance
1704    during matrix assembly can be increased by more than a factor of 50.
1705 
1706    Input Parameters:
1707 .  comm - MPI communicator, set to MPI_COMM_SELF
1708 .  bs - size of block
1709 .  m - number of rows
1710 .  n - number of columns
1711 .  nz - number of block nonzeros per block row (same for all rows)
1712 .  nzz - array containing the number of block nonzeros in the various block rows
1713          (possibly different for each block row) or PETSC_NULL
1714 
1715    Output Parameter:
1716 .  A - the matrix
1717 
1718    Notes:
1719    The block AIJ format is fully compatible with standard Fortran 77
1720    storage.  That is, the stored row and column indices can begin at
1721    either one (as in Fortran) or zero.  See the users' manual for details.
1722 
1723    Specify the preallocated storage with either nz or nnz (not both).
1724    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1725    allocation.  For additional details, see the users manual chapter on
1726    matrices and the file $(PETSC_DIR)/Performance.
1727 
1728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1729 @*/
1730 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1731 {
1732   Mat         B;
1733   Mat_SeqBAIJ *b;
1734   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1735 
1736   MPI_Comm_size(comm,&size);
1737   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1738 
1739   if (mbs*bs!=m || nbs*bs!=n)
1740     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1741 
1742   *A = 0;
1743   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1744   PLogObjectCreate(B);
1745   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1746   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1747   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1748   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1749   if (!flg) {
1750     switch (bs) {
1751       case 1:
1752         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1753         B->ops.solve           = MatSolve_SeqBAIJ_1;
1754         B->ops.mult            = MatMult_SeqBAIJ_1;
1755         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1756        break;
1757       case 2:
1758         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1759         B->ops.solve           = MatSolve_SeqBAIJ_2;
1760         B->ops.mult            = MatMult_SeqBAIJ_2;
1761         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1762         break;
1763       case 3:
1764         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1765         B->ops.solve           = MatSolve_SeqBAIJ_3;
1766         B->ops.mult            = MatMult_SeqBAIJ_3;
1767         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1768         break;
1769       case 4:
1770         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1771         B->ops.solve           = MatSolve_SeqBAIJ_4;
1772         B->ops.mult            = MatMult_SeqBAIJ_4;
1773         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1774         break;
1775       case 5:
1776         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1777         B->ops.solve           = MatSolve_SeqBAIJ_5;
1778         B->ops.mult            = MatMult_SeqBAIJ_5;
1779         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1780         break;
1781     }
1782   }
1783   B->destroy          = MatDestroy_SeqBAIJ;
1784   B->view             = MatView_SeqBAIJ;
1785   B->factor           = 0;
1786   B->lupivotthreshold = 1.0;
1787   B->mapping          = 0;
1788   b->row              = 0;
1789   b->col              = 0;
1790   b->reallocs         = 0;
1791 
1792   b->m       = m; B->m = m; B->M = m;
1793   b->n       = n; B->n = n; B->N = n;
1794   b->mbs     = mbs;
1795   b->nbs     = nbs;
1796   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1797   if (nnz == PETSC_NULL) {
1798     if (nz == PETSC_DEFAULT) nz = 5;
1799     else if (nz <= 0)        nz = 1;
1800     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1801     nz = nz*mbs;
1802   }
1803   else {
1804     nz = 0;
1805     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1806   }
1807 
1808   /* allocate the matrix space */
1809   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1810   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1811   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1812   b->j  = (int *) (b->a + nz*bs2);
1813   PetscMemzero(b->j,nz*sizeof(int));
1814   b->i  = b->j + nz;
1815   b->singlemalloc = 1;
1816 
1817   b->i[0] = 0;
1818   for (i=1; i<mbs+1; i++) {
1819     b->i[i] = b->i[i-1] + b->imax[i-1];
1820   }
1821 
1822   /* b->ilen will count nonzeros in each block row so far. */
1823   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1824   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1825   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1826 
1827   b->bs               = bs;
1828   b->bs2              = bs2;
1829   b->mbs              = mbs;
1830   b->nz               = 0;
1831   b->maxnz            = nz*bs2;
1832   b->sorted           = 0;
1833   b->roworiented      = 1;
1834   b->nonew            = 0;
1835   b->diag             = 0;
1836   b->solve_work       = 0;
1837   b->mult_work        = 0;
1838   b->spptr            = 0;
1839   B->info.nz_unneeded = (double)b->maxnz;
1840 
1841   *A = B;
1842   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1843   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1844   return 0;
1845 }
1846 
1847 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1848 {
1849   Mat         C;
1850   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1851   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1852 
1853   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1854 
1855   *B = 0;
1856   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1857   PLogObjectCreate(C);
1858   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1859   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1860   C->destroy    = MatDestroy_SeqBAIJ;
1861   C->view       = MatView_SeqBAIJ;
1862   C->factor     = A->factor;
1863   c->row        = 0;
1864   c->col        = 0;
1865   C->assembled  = PETSC_TRUE;
1866 
1867   c->m = C->m   = a->m;
1868   c->n = C->n   = a->n;
1869   C->M          = a->m;
1870   C->N          = a->n;
1871 
1872   c->bs         = a->bs;
1873   c->bs2        = a->bs2;
1874   c->mbs        = a->mbs;
1875   c->nbs        = a->nbs;
1876 
1877   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1878   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1879   for ( i=0; i<mbs; i++ ) {
1880     c->imax[i] = a->imax[i];
1881     c->ilen[i] = a->ilen[i];
1882   }
1883 
1884   /* allocate the matrix space */
1885   c->singlemalloc = 1;
1886   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1887   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1888   c->j  = (int *) (c->a + nz*bs2);
1889   c->i  = c->j + nz;
1890   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1891   if (mbs > 0) {
1892     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1893     if (cpvalues == COPY_VALUES) {
1894       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1895     }
1896   }
1897 
1898   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1899   c->sorted      = a->sorted;
1900   c->roworiented = a->roworiented;
1901   c->nonew       = a->nonew;
1902 
1903   if (a->diag) {
1904     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1905     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1906     for ( i=0; i<mbs; i++ ) {
1907       c->diag[i] = a->diag[i];
1908     }
1909   }
1910   else c->diag          = 0;
1911   c->nz                 = a->nz;
1912   c->maxnz              = a->maxnz;
1913   c->solve_work         = 0;
1914   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1915   c->mult_work          = 0;
1916   *B = C;
1917   return 0;
1918 }
1919 
1920 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1921 {
1922   Mat_SeqBAIJ  *a;
1923   Mat          B;
1924   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1925   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1926   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1927   int          *masked, nmask,tmp,bs2,ishift;
1928   Scalar       *aa;
1929   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1930 
1931   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1932   bs2  = bs*bs;
1933 
1934   MPI_Comm_size(comm,&size);
1935   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1936   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1937   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1938   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1939   M = header[1]; N = header[2]; nz = header[3];
1940 
1941   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1942 
1943   /*
1944      This code adds extra rows to make sure the number of rows is
1945     divisible by the blocksize
1946   */
1947   mbs        = M/bs;
1948   extra_rows = bs - M + bs*(mbs);
1949   if (extra_rows == bs) extra_rows = 0;
1950   else                  mbs++;
1951   if (extra_rows) {
1952     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1953   }
1954 
1955   /* read in row lengths */
1956   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1957   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1958   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1959 
1960   /* read in column indices */
1961   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1962   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1963   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1964 
1965   /* loop over row lengths determining block row lengths */
1966   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1967   PetscMemzero(browlengths,mbs*sizeof(int));
1968   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1969   PetscMemzero(mask,mbs*sizeof(int));
1970   masked = mask + mbs;
1971   rowcount = 0; nzcount = 0;
1972   for ( i=0; i<mbs; i++ ) {
1973     nmask = 0;
1974     for ( j=0; j<bs; j++ ) {
1975       kmax = rowlengths[rowcount];
1976       for ( k=0; k<kmax; k++ ) {
1977         tmp = jj[nzcount++]/bs;
1978         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1979       }
1980       rowcount++;
1981     }
1982     browlengths[i] += nmask;
1983     /* zero out the mask elements we set */
1984     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1985   }
1986 
1987   /* create our matrix */
1988   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1989          CHKERRQ(ierr);
1990   B = *A;
1991   a = (Mat_SeqBAIJ *) B->data;
1992 
1993   /* set matrix "i" values */
1994   a->i[0] = 0;
1995   for ( i=1; i<= mbs; i++ ) {
1996     a->i[i]      = a->i[i-1] + browlengths[i-1];
1997     a->ilen[i-1] = browlengths[i-1];
1998   }
1999   a->nz         = 0;
2000   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2001 
2002   /* read in nonzero values */
2003   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2004   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2005   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2006 
2007   /* set "a" and "j" values into matrix */
2008   nzcount = 0; jcount = 0;
2009   for ( i=0; i<mbs; i++ ) {
2010     nzcountb = nzcount;
2011     nmask    = 0;
2012     for ( j=0; j<bs; j++ ) {
2013       kmax = rowlengths[i*bs+j];
2014       for ( k=0; k<kmax; k++ ) {
2015         tmp = jj[nzcount++]/bs;
2016 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2017       }
2018       rowcount++;
2019     }
2020     /* sort the masked values */
2021     PetscSortInt(nmask,masked);
2022 
2023     /* set "j" values into matrix */
2024     maskcount = 1;
2025     for ( j=0; j<nmask; j++ ) {
2026       a->j[jcount++]  = masked[j];
2027       mask[masked[j]] = maskcount++;
2028     }
2029     /* set "a" values into matrix */
2030     ishift = bs2*a->i[i];
2031     for ( j=0; j<bs; j++ ) {
2032       kmax = rowlengths[i*bs+j];
2033       for ( k=0; k<kmax; k++ ) {
2034         tmp    = jj[nzcountb]/bs ;
2035         block  = mask[tmp] - 1;
2036         point  = jj[nzcountb] - bs*tmp;
2037         idx    = ishift + bs2*block + j + bs*point;
2038         a->a[idx] = aa[nzcountb++];
2039       }
2040     }
2041     /* zero out the mask elements we set */
2042     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2043   }
2044   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2045 
2046   PetscFree(rowlengths);
2047   PetscFree(browlengths);
2048   PetscFree(aa);
2049   PetscFree(jj);
2050   PetscFree(mask);
2051 
2052   B->assembled = PETSC_TRUE;
2053 
2054   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2055   if (flg) {
2056     Viewer tviewer;
2057     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2058     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2059     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2060     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2061   }
2062   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2063   if (flg) {
2064     Viewer tviewer;
2065     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2066     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2067     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2068     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2069   }
2070   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2071   if (flg) {
2072     Viewer tviewer;
2073     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2074     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2075     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2076   }
2077   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2078   if (flg) {
2079     Viewer tviewer;
2080     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2081     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2082     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2083     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2084   }
2085   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2086   if (flg) {
2087     Viewer tviewer;
2088     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2089     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2090     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2091     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2092   }
2093   return 0;
2094 }
2095 
2096 
2097 
2098