xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4a2da8023ded797213582fbae6c8340879a554cb)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.61 1996/07/31 20:25:01 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
345 {
346   Mat         A = (Mat) obj;
347   ViewerType  vtype;
348   int         ierr;
349 
350   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
351   if (vtype == MATLAB_VIEWER) {
352     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
353   }
354   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
355     return MatView_SeqBAIJ_ASCII(A,viewer);
356   }
357   else if (vtype == BINARY_FILE_VIEWER) {
358     return MatView_SeqBAIJ_Binary(A,viewer);
359   }
360   else if (vtype == DRAW_VIEWER) {
361     return MatView_SeqBAIJ_Draw(A,viewer);
362   }
363   return 0;
364 }
365 
366 #define CHUNKSIZE  10
367 
368 /* This version has row oriented v  */
369 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
370 {
371   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
372   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
373   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
374   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
375   int          ridx,cidx,bs2=a->bs2;
376   Scalar      *ap,value,*aa=a->a,*bap;
377 
378   for ( k=0; k<m; k++ ) { /* loop over added rows */
379     row  = im[k]; brow = row/bs;
380     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
381     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
382     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
383     rmax = imax[brow]; nrow = ailen[brow];
384     low = 0;
385     for ( l=0; l<n; l++ ) { /* loop over added columns */
386       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
387       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
388       col = in[l]; bcol = col/bs;
389       ridx = row % bs; cidx = col % bs;
390       if (roworiented) {
391         value = *v++;
392       }
393       else {
394         value = v[k + l*m];
395       }
396       if (!sorted) low = 0; high = nrow;
397       while (high-low > 5) {
398         t = (low+high)/2;
399         if (rp[t] > bcol) high = t;
400         else             low  = t;
401       }
402       for ( i=low; i<high; i++ ) {
403         if (rp[i] > bcol) break;
404         if (rp[i] == bcol) {
405           bap  = ap +  bs2*i + bs*cidx + ridx;
406           if (is == ADD_VALUES) *bap += value;
407           else                  *bap  = value;
408           goto noinsert;
409         }
410       }
411       if (nonew) goto noinsert;
412       if (nrow >= rmax) {
413         /* there is no extra room in row, therefore enlarge */
414         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
415         Scalar *new_a;
416 
417         /* malloc new storage space */
418         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
419         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
420         new_j   = (int *) (new_a + bs2*new_nz);
421         new_i   = new_j + new_nz;
422 
423         /* copy over old data into new slots */
424         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
425         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
426         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
427         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
428         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
429                                                            len*sizeof(int));
430         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
431         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
432         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
433                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
434         /* free up old matrix storage */
435         PetscFree(a->a);
436         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
437         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
438         a->singlemalloc = 1;
439 
440         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
441         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
442         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
443         a->maxnz += bs2*CHUNKSIZE;
444         a->reallocs++;
445         a->nz++;
446       }
447       N = nrow++ - 1;
448       /* shift up all the later entries in this row */
449       for ( ii=N; ii>=i; ii-- ) {
450         rp[ii+1] = rp[ii];
451          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
452       }
453       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
454       rp[i] = bcol;
455       ap[bs2*i + bs*cidx + ridx] = value;
456       noinsert:;
457       low = i;
458     }
459     ailen[brow] = nrow;
460   }
461   return 0;
462 }
463 
464 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
465 {
466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
467   *m = a->m; *n = a->n;
468   return 0;
469 }
470 
471 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
472 {
473   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
474   *m = 0; *n = a->m;
475   return 0;
476 }
477 
478 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
479 {
480   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
481   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
482   Scalar      *aa,*v_i,*aa_i;
483 
484   bs  = a->bs;
485   ai  = a->i;
486   aj  = a->j;
487   aa  = a->a;
488   bs2 = a->bs2;
489 
490   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
491 
492   bn  = row/bs;   /* Block number */
493   bp  = row % bs; /* Block Position */
494   M   = ai[bn+1] - ai[bn];
495   *nz = bs*M;
496 
497   if (v) {
498     *v = 0;
499     if (*nz) {
500       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
501       for ( i=0; i<M; i++ ) { /* for each block in the block row */
502         v_i  = *v + i*bs;
503         aa_i = aa + bs2*(ai[bn] + i);
504         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
505       }
506     }
507   }
508 
509   if (idx) {
510     *idx = 0;
511     if (*nz) {
512       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
513       for ( i=0; i<M; i++ ) { /* for each block in the block row */
514         idx_i = *idx + i*bs;
515         itmp  = bs*aj[ai[bn] + i];
516         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
517       }
518     }
519   }
520   return 0;
521 }
522 
523 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
524 {
525   if (idx) {if (*idx) PetscFree(*idx);}
526   if (v)   {if (*v)   PetscFree(*v);}
527   return 0;
528 }
529 
530 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
531 {
532   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
533   Mat         C;
534   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
535   int         *rows,*cols,bs2=a->bs2;
536   Scalar      *array=a->a;
537 
538   if (B==PETSC_NULL && mbs!=nbs)
539     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
540   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
541   PetscMemzero(col,(1+nbs)*sizeof(int));
542 
543   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
544   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
545   PetscFree(col);
546   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
547   cols = rows + bs;
548   for ( i=0; i<mbs; i++ ) {
549     cols[0] = i*bs;
550     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
551     len = ai[i+1] - ai[i];
552     for ( j=0; j<len; j++ ) {
553       rows[0] = (*aj++)*bs;
554       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
555       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
556       array += bs2;
557     }
558   }
559   PetscFree(rows);
560 
561   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
562   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
563 
564   if (B != PETSC_NULL) {
565     *B = C;
566   } else {
567     /* This isn't really an in-place transpose */
568     PetscFree(a->a);
569     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
570     if (a->diag) PetscFree(a->diag);
571     if (a->ilen) PetscFree(a->ilen);
572     if (a->imax) PetscFree(a->imax);
573     if (a->solve_work) PetscFree(a->solve_work);
574     PetscFree(a);
575     PetscMemcpy(A,C,sizeof(struct _Mat));
576     PetscHeaderDestroy(C);
577   }
578   return 0;
579 }
580 
581 
582 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
583 {
584   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
585   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
586   int        m = a->m,*ip, N, *ailen = a->ilen;
587   int        mbs = a->mbs, bs2 = a->bs2;
588   Scalar     *aa = a->a, *ap;
589 
590   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
591 
592   for ( i=1; i<mbs; i++ ) {
593     /* move each row back by the amount of empty slots (fshift) before it*/
594     fshift += imax[i-1] - ailen[i-1];
595     if (fshift) {
596       ip = aj + ai[i]; ap = aa + bs2*ai[i];
597       N = ailen[i];
598       for ( j=0; j<N; j++ ) {
599         ip[j-fshift] = ip[j];
600         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
601       }
602     }
603     ai[i] = ai[i-1] + ailen[i-1];
604   }
605   if (mbs) {
606     fshift += imax[mbs-1] - ailen[mbs-1];
607     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
608   }
609   /* reset ilen and imax for each row */
610   for ( i=0; i<mbs; i++ ) {
611     ailen[i] = imax[i] = ai[i+1] - ai[i];
612   }
613   a->nz = ai[mbs];
614 
615   /* diagonals may have moved, so kill the diagonal pointers */
616   if (fshift && a->diag) {
617     PetscFree(a->diag);
618     PLogObjectMemory(A,-(m+1)*sizeof(int));
619     a->diag = 0;
620   }
621   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
622   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
623            a->reallocs);
624   return 0;
625 }
626 
627 static int MatZeroEntries_SeqBAIJ(Mat A)
628 {
629   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
630   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
631   return 0;
632 }
633 
634 int MatDestroy_SeqBAIJ(PetscObject obj)
635 {
636   Mat         A  = (Mat) obj;
637   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
638 
639 #if defined(PETSC_LOG)
640   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
641 #endif
642   PetscFree(a->a);
643   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
644   if (a->diag) PetscFree(a->diag);
645   if (a->ilen) PetscFree(a->ilen);
646   if (a->imax) PetscFree(a->imax);
647   if (a->solve_work) PetscFree(a->solve_work);
648   if (a->mult_work) PetscFree(a->mult_work);
649   PetscFree(a);
650   PLogObjectDestroy(A);
651   PetscHeaderDestroy(A);
652   return 0;
653 }
654 
655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
656 {
657   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
658   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
659   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
660   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
661   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
662   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
663   else if (op == MAT_ROWS_SORTED ||
664            op == MAT_SYMMETRIC ||
665            op == MAT_STRUCTURALLY_SYMMETRIC ||
666            op == MAT_YES_NEW_DIAGONALS)
667     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
668   else if (op == MAT_NO_NEW_DIAGONALS)
669     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
670   else
671     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
672   return 0;
673 }
674 
675 
676 /* -------------------------------------------------------*/
677 /* Should check that shapes of vectors and matrices match */
678 /* -------------------------------------------------------*/
679 #include "pinclude/plapack.h"
680 
681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
682 {
683   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
684   Scalar          *xg,*zg;
685   register Scalar *x,*z,*v,sum;
686   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
687 
688   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690 
691   idx   = a->j;
692   v     = a->a;
693   ii    = a->i;
694 
695   for ( i=0; i<mbs; i++ ) {
696     n    = ii[1] - ii[0]; ii++;
697     sum  = 0.0;
698     while (n--) sum += *v++ * x[*idx++];
699     z[i] = sum;
700   }
701   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
702   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
703   PLogFlops(2*a->nz - a->m);
704   return 0;
705 }
706 
707 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
708 {
709   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
710   Scalar          *xg,*zg;
711   register Scalar *x,*z,*v,*xb,sum1,sum2;
712   register Scalar x1,x2;
713   int             mbs=a->mbs,i,*idx,*ii;
714   int             j,n,ierr;
715 
716   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
717   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
718 
719   idx   = a->j;
720   v     = a->a;
721   ii    = a->i;
722 
723   for ( i=0; i<mbs; i++ ) {
724     n  = ii[1] - ii[0]; ii++;
725     sum1 = 0.0; sum2 = 0.0;
726     for ( j=0; j<n; j++ ) {
727       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
728       sum1 += v[0]*x1 + v[2]*x2;
729       sum2 += v[1]*x1 + v[3]*x2;
730       v += 4;
731     }
732     z[0] = sum1; z[1] = sum2;
733     z += 2;
734   }
735   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
736   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
737   PLogFlops(4*a->nz - a->m);
738   return 0;
739 }
740 
741 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
742 {
743   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
744   Scalar          *xg,*zg;
745   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
746   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
747 
748   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
749   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
750 
751   idx   = a->j;
752   v     = a->a;
753   ii    = a->i;
754 
755   for ( i=0; i<mbs; i++ ) {
756     n  = ii[1] - ii[0]; ii++;
757     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
758     for ( j=0; j<n; j++ ) {
759       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
760       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
761       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
762       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
763       v += 9;
764     }
765     z[0] = sum1; z[1] = sum2; z[2] = sum3;
766     z += 3;
767   }
768   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
769   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
770   PLogFlops(18*a->nz - a->m);
771   return 0;
772 }
773 
774 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
775 {
776   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
777   Scalar          *xg,*zg;
778   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
779   register Scalar x1,x2,x3,x4;
780   int             mbs=a->mbs,i,*idx,*ii;
781   int             j,n,ierr;
782 
783   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
784   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
785 
786   idx   = a->j;
787   v     = a->a;
788   ii    = a->i;
789 
790   for ( i=0; i<mbs; i++ ) {
791     n  = ii[1] - ii[0]; ii++;
792     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
793     for ( j=0; j<n; j++ ) {
794       xb = x + 4*(*idx++);
795       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
796       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
797       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
798       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
799       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
800       v += 16;
801     }
802     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
803     z += 4;
804   }
805   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
806   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
807   PLogFlops(32*a->nz - a->m);
808   return 0;
809 }
810 
811 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
812 {
813   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
814   Scalar          *xg,*zg;
815   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
816   register Scalar x1,x2,x3,x4,x5;
817   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
818 
819   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
820   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
821 
822   idx   = a->j;
823   v     = a->a;
824   ii    = a->i;
825 
826   for ( i=0; i<mbs; i++ ) {
827     n  = ii[1] - ii[0]; ii++;
828     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
829     for ( j=0; j<n; j++ ) {
830       xb = x + 5*(*idx++);
831       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
832       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
833       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
834       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
835       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
836       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
837       v += 25;
838     }
839     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
840     z += 5;
841   }
842   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
843   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
844   PLogFlops(50*a->nz - a->m);
845   return 0;
846 }
847 
848 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
849 {
850   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
851   Scalar          *xg,*zg;
852   register Scalar *x,*z,*v,*xb;
853   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
854   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
855 
856   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
857   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
858 
859   idx   = a->j;
860   v     = a->a;
861   ii    = a->i;
862 
863 
864   if (!a->mult_work) {
865     k = PetscMax(a->m,a->n);
866     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
867   }
868   work = a->mult_work;
869   for ( i=0; i<mbs; i++ ) {
870     n     = ii[1] - ii[0]; ii++;
871     ncols = n*bs;
872     workt = work;
873     for ( j=0; j<n; j++ ) {
874       xb = x + bs*(*idx++);
875       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
876       workt += bs;
877     }
878     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
879     v += n*bs2;
880     z += bs;
881   }
882    ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
883   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
884   PLogFlops(2*a->nz*bs2 - a->m);
885   return 0;
886 }
887 
888 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
889 {
890   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
891   Scalar          *xg,*yg,*zg;
892   register Scalar *x,*y,*z,*v,sum;
893   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
894 
895   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
896   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
897   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
898 
899   idx   = a->j;
900   v     = a->a;
901   ii    = a->i;
902 
903   for ( i=0; i<mbs; i++ ) {
904     n    = ii[1] - ii[0]; ii++;
905     sum  = y[i];
906     while (n--) sum += *v++ * x[*idx++];
907     z[i] = sum;
908   }
909   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
910   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
911   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
912   PLogFlops(2*a->nz);
913   return 0;
914 }
915 
916 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
917 {
918   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
919   Scalar          *xg,*yg,*zg;
920   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
921   register Scalar x1,x2;
922   int             mbs=a->mbs,i,*idx,*ii;
923   int             j,n,ierr;
924 
925   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
926   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
927   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
928 
929   idx   = a->j;
930   v     = a->a;
931   ii    = a->i;
932 
933   for ( i=0; i<mbs; i++ ) {
934     n  = ii[1] - ii[0]; ii++;
935     sum1 = y[0]; sum2 = y[1];
936     for ( j=0; j<n; j++ ) {
937       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
938       sum1 += v[0]*x1 + v[2]*x2;
939       sum2 += v[1]*x1 + v[3]*x2;
940       v += 4;
941     }
942     z[0] = sum1; z[1] = sum2;
943     z += 2; y += 2;
944   }
945   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
946   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
947   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
948   PLogFlops(4*a->nz);
949   return 0;
950 }
951 
952 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
953 {
954   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
955   Scalar          *xg,*yg,*zg;
956   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
957   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
958 
959   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
960   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
961   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
962 
963   idx   = a->j;
964   v     = a->a;
965   ii    = a->i;
966 
967   for ( i=0; i<mbs; i++ ) {
968     n  = ii[1] - ii[0]; ii++;
969     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
970     for ( j=0; j<n; j++ ) {
971       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
972       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
973       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
974       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
975       v += 9;
976     }
977     z[0] = sum1; z[1] = sum2; z[2] = sum3;
978     z += 3; y += 3;
979   }
980   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
981   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
982   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
983   PLogFlops(18*a->nz);
984   return 0;
985 }
986 
987 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
988 {
989   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
990   Scalar          *xg,*yg,*zg;
991   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
992   register Scalar x1,x2,x3,x4;
993   int             mbs=a->mbs,i,*idx,*ii;
994   int             j,n,ierr;
995 
996   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
997   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
998   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
999 
1000   idx   = a->j;
1001   v     = a->a;
1002   ii    = a->i;
1003 
1004   for ( i=0; i<mbs; i++ ) {
1005     n  = ii[1] - ii[0]; ii++;
1006     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1007     for ( j=0; j<n; j++ ) {
1008       xb = x + 4*(*idx++);
1009       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1010       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1011       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1012       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1013       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1014       v += 16;
1015     }
1016     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1017     z += 4; y += 4;
1018   }
1019   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1020   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
1021   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1022   PLogFlops(32*a->nz);
1023   return 0;
1024 }
1025 
1026 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1027 {
1028   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1029   Scalar          *xg,*yg,*zg;
1030   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1031   register Scalar x1,x2,x3,x4,x5;
1032   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
1033 
1034   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1035   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
1036   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1037 
1038   idx   = a->j;
1039   v     = a->a;
1040   ii    = a->i;
1041 
1042   for ( i=0; i<mbs; i++ ) {
1043     n  = ii[1] - ii[0]; ii++;
1044     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1045     for ( j=0; j<n; j++ ) {
1046       xb = x + 5*(*idx++);
1047       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1048       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1049       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1050       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1051       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1052       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1053       v += 25;
1054     }
1055     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1056     z += 5; y += 5;
1057   }
1058   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1059   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
1060   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1061   PLogFlops(50*a->nz);
1062   return 0;
1063 }
1064 
1065 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1066 {
1067   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1068   Scalar          *xg,*zg;
1069   register Scalar *x,*z,*v,*xb;
1070   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1071   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1072 
1073   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1074 
1075   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1076   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1077 
1078   idx   = a->j;
1079   v     = a->a;
1080   ii    = a->i;
1081 
1082 
1083   if (!a->mult_work) {
1084     k = PetscMax(a->m,a->n);
1085     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1086   }
1087   work = a->mult_work;
1088   for ( i=0; i<mbs; i++ ) {
1089     n     = ii[1] - ii[0]; ii++;
1090     ncols = n*bs;
1091     workt = work;
1092     for ( j=0; j<n; j++ ) {
1093       xb = x + bs*(*idx++);
1094       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1095       workt += bs;
1096     }
1097     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1098     v += n*bs2;
1099     z += bs;
1100   }
1101   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1102   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1103   PLogFlops(2*a->nz*bs2 );
1104   return 0;
1105 }
1106 
1107 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1108 {
1109   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1110   Scalar          *xg,*zg,*zb;
1111   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1112   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1113   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1114 
1115 
1116   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1117   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1118   PetscMemzero(z,N*sizeof(Scalar));
1119 
1120   idx   = a->j;
1121   v     = a->a;
1122   ii    = a->i;
1123 
1124   switch (bs) {
1125   case 1:
1126     for ( i=0; i<mbs; i++ ) {
1127       n  = ii[1] - ii[0]; ii++;
1128       xb = x + i; x1 = xb[0];
1129       ib = idx + ai[i];
1130       for ( j=0; j<n; j++ ) {
1131         rval    = ib[j];
1132         z[rval] += *v++ * x1;
1133       }
1134     }
1135     break;
1136   case 2:
1137     for ( i=0; i<mbs; i++ ) {
1138       n  = ii[1] - ii[0]; ii++;
1139       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1140       ib = idx + ai[i];
1141       for ( j=0; j<n; j++ ) {
1142         rval      = ib[j]*2;
1143         z[rval++] += v[0]*x1 + v[1]*x2;
1144         z[rval++] += v[2]*x1 + v[3]*x2;
1145         v += 4;
1146       }
1147     }
1148     break;
1149   case 3:
1150     for ( i=0; i<mbs; i++ ) {
1151       n  = ii[1] - ii[0]; ii++;
1152       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1153       ib = idx + ai[i];
1154       for ( j=0; j<n; j++ ) {
1155         rval      = ib[j]*3;
1156         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1157         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1158         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1159         v += 9;
1160       }
1161     }
1162     break;
1163   case 4:
1164     for ( i=0; i<mbs; i++ ) {
1165       n  = ii[1] - ii[0]; ii++;
1166       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1167       ib = idx + ai[i];
1168       for ( j=0; j<n; j++ ) {
1169         rval      = ib[j]*4;
1170         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1171         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1172         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1173         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1174         v += 16;
1175       }
1176     }
1177     break;
1178   case 5:
1179     for ( i=0; i<mbs; i++ ) {
1180       n  = ii[1] - ii[0]; ii++;
1181       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1182       x4 = xb[3];   x5 = xb[4];
1183       ib = idx + ai[i];
1184       for ( j=0; j<n; j++ ) {
1185         rval      = ib[j]*5;
1186         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1187         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1188         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1189         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1190         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1191         v += 25;
1192       }
1193     }
1194     break;
1195       /* block sizes larger then 5 by 5 are handled by BLAS */
1196     default: {
1197       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1198       if (!a->mult_work) {
1199         k = PetscMax(a->m,a->n);
1200         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1201         CHKPTRQ(a->mult_work);
1202       }
1203       work = a->mult_work;
1204       for ( i=0; i<mbs; i++ ) {
1205         n     = ii[1] - ii[0]; ii++;
1206         ncols = n*bs;
1207         PetscMemzero(work,ncols*sizeof(Scalar));
1208         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1209         v += n*bs2;
1210         x += bs;
1211         workt = work;
1212         for ( j=0; j<n; j++ ) {
1213           zb = z + bs*(*idx++);
1214           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1215           workt += bs;
1216         }
1217       }
1218     }
1219   }
1220   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1221   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1222   PLogFlops(2*a->nz*a->bs2 - a->n);
1223   return 0;
1224 }
1225 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1226 {
1227   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1228   Scalar          *xg,*zg,*zb;
1229   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1230   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1231   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1232 
1233 
1234 
1235   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1236   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1237 
1238   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1239   else PetscMemzero(z,N*sizeof(Scalar));
1240 
1241 
1242   idx   = a->j;
1243   v     = a->a;
1244   ii    = a->i;
1245 
1246   switch (bs) {
1247   case 1:
1248     for ( i=0; i<mbs; i++ ) {
1249       n  = ii[1] - ii[0]; ii++;
1250       xb = x + i; x1 = xb[0];
1251       ib = idx + ai[i];
1252       for ( j=0; j<n; j++ ) {
1253         rval    = ib[j];
1254         z[rval] += *v++ * x1;
1255       }
1256     }
1257     break;
1258   case 2:
1259     for ( i=0; i<mbs; i++ ) {
1260       n  = ii[1] - ii[0]; ii++;
1261       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1262       ib = idx + ai[i];
1263       for ( j=0; j<n; j++ ) {
1264         rval      = ib[j]*2;
1265         z[rval++] += v[0]*x1 + v[1]*x2;
1266         z[rval++] += v[2]*x1 + v[3]*x2;
1267         v += 4;
1268       }
1269     }
1270     break;
1271   case 3:
1272     for ( i=0; i<mbs; i++ ) {
1273       n  = ii[1] - ii[0]; ii++;
1274       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1275       ib = idx + ai[i];
1276       for ( j=0; j<n; j++ ) {
1277         rval      = ib[j]*3;
1278         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1279         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1280         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1281         v += 9;
1282       }
1283     }
1284     break;
1285   case 4:
1286     for ( i=0; i<mbs; i++ ) {
1287       n  = ii[1] - ii[0]; ii++;
1288       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1289       ib = idx + ai[i];
1290       for ( j=0; j<n; j++ ) {
1291         rval      = ib[j]*4;
1292         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1293         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1294         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1295         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1296         v += 16;
1297       }
1298     }
1299     break;
1300   case 5:
1301     for ( i=0; i<mbs; i++ ) {
1302       n  = ii[1] - ii[0]; ii++;
1303       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1304       x4 = xb[3];   x5 = xb[4];
1305       ib = idx + ai[i];
1306       for ( j=0; j<n; j++ ) {
1307         rval      = ib[j]*5;
1308         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1309         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1310         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1311         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1312         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1313         v += 25;
1314       }
1315     }
1316     break;
1317       /* block sizes larger then 5 by 5 are handled by BLAS */
1318     default: {
1319       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1320       if (!a->mult_work) {
1321         k = PetscMax(a->m,a->n);
1322         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1323         CHKPTRQ(a->mult_work);
1324       }
1325       work = a->mult_work;
1326       for ( i=0; i<mbs; i++ ) {
1327         n     = ii[1] - ii[0]; ii++;
1328         ncols = n*bs;
1329         PetscMemzero(work,ncols*sizeof(Scalar));
1330         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1331         v += n*bs2;
1332         x += bs;
1333         workt = work;
1334         for ( j=0; j<n; j++ ) {
1335           zb = z + bs*(*idx++);
1336           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1337           workt += bs;
1338         }
1339       }
1340     }
1341   }
1342   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1343   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1344   PLogFlops(2*a->nz*a->bs2);
1345   return 0;
1346 }
1347 
1348 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
1349 {
1350   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1351   if (nz)  *nz  = a->bs2*a->nz;
1352   if (nza) *nza = a->maxnz;
1353   if (mem) *mem = (int)A->mem;
1354   return 0;
1355 }
1356 
1357 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1358 {
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1360 
1361   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1362 
1363   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1364   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1365       (a->nz != b->nz)) {
1366     *flg = PETSC_FALSE; return 0;
1367   }
1368 
1369   /* if the a->i are the same */
1370   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1371     *flg = PETSC_FALSE; return 0;
1372   }
1373 
1374   /* if a->j are the same */
1375   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1376     *flg = PETSC_FALSE; return 0;
1377   }
1378 
1379   /* if a->a are the same */
1380   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1381     *flg = PETSC_FALSE; return 0;
1382   }
1383   *flg = PETSC_TRUE;
1384   return 0;
1385 
1386 }
1387 
1388 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1389 {
1390   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1391   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1392   Scalar      *x, zero = 0.0,*aa,*aa_j;
1393 
1394   bs   = a->bs;
1395   aa   = a->a;
1396   ai   = a->i;
1397   aj   = a->j;
1398   ambs = a->mbs;
1399   bs2  = a->bs2;
1400 
1401   VecSet(&zero,v);
1402   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1403   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1404   for ( i=0; i<ambs; i++ ) {
1405     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1406       if (aj[j] == i) {
1407         row  = i*bs;
1408         aa_j = aa+j*bs2;
1409         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1410         break;
1411       }
1412     }
1413   }
1414   return 0;
1415 }
1416 
1417 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1418 {
1419   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1420   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1421   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1422 
1423   ai  = a->i;
1424   aj  = a->j;
1425   aa  = a->a;
1426   m   = a->m;
1427   n   = a->n;
1428   bs  = a->bs;
1429   mbs = a->mbs;
1430   bs2 = a->bs2;
1431   if (ll) {
1432     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1433     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1434     for ( i=0; i<mbs; i++ ) { /* for each block row */
1435       M  = ai[i+1] - ai[i];
1436       li = l + i*bs;
1437       v  = aa + bs2*ai[i];
1438       for ( j=0; j<M; j++ ) { /* for each block */
1439         for ( k=0; k<bs2; k++ ) {
1440           (*v++) *= li[k%bs];
1441         }
1442       }
1443     }
1444   }
1445 
1446   if (rr) {
1447     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1448     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1449     for ( i=0; i<mbs; i++ ) { /* for each block row */
1450       M  = ai[i+1] - ai[i];
1451       v  = aa + bs2*ai[i];
1452       for ( j=0; j<M; j++ ) { /* for each block */
1453         ri = r + bs*aj[ai[i]+j];
1454         for ( k=0; k<bs; k++ ) {
1455           x = ri[k];
1456           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1457         }
1458       }
1459     }
1460   }
1461   return 0;
1462 }
1463 
1464 
1465 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1466 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1467 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1468 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1469 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1470 
1471 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1472 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1473 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1474 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1475 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1476 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1477 
1478 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1479 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1480 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1481 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1482 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1483 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1484 
1485 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1486 {
1487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1488   Scalar      *v = a->a;
1489   double      sum = 0.0;
1490   int         i,nz=a->nz,bs2=a->bs2;
1491 
1492   if (type == NORM_FROBENIUS) {
1493     for (i=0; i< bs2*nz; i++ ) {
1494 #if defined(PETSC_COMPLEX)
1495       sum += real(conj(*v)*(*v)); v++;
1496 #else
1497       sum += (*v)*(*v); v++;
1498 #endif
1499     }
1500     *norm = sqrt(sum);
1501   }
1502   else {
1503     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1504   }
1505   return 0;
1506 }
1507 
1508 /*
1509      note: This can only work for identity for row and col. It would
1510    be good to check this and otherwise generate an error.
1511 */
1512 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1513 {
1514   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1515   Mat         outA;
1516   int         ierr;
1517 
1518   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1519 
1520   outA          = inA;
1521   inA->factor   = FACTOR_LU;
1522   a->row        = row;
1523   a->col        = col;
1524 
1525   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1526 
1527   if (!a->diag) {
1528     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1529   }
1530   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1531   return 0;
1532 }
1533 
1534 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1535 {
1536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1537   int         one = 1, totalnz = a->bs2*a->nz;
1538   BLscal_( &totalnz, alpha, a->a, &one );
1539   PLogFlops(totalnz);
1540   return 0;
1541 }
1542 
1543 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1544 {
1545   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1546   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1547   int        *ai = a->i, *ailen = a->ilen;
1548   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1549   Scalar     *ap, *aa = a->a, zero = 0.0;
1550 
1551   for ( k=0; k<m; k++ ) { /* loop over rows */
1552     row  = im[k]; brow = row/bs;
1553     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1554     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1555     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1556     nrow = ailen[brow];
1557     for ( l=0; l<n; l++ ) { /* loop over columns */
1558       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1559       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1560       col  = in[l] ;
1561       bcol = col/bs;
1562       cidx = col%bs;
1563       ridx = row%bs;
1564       high = nrow;
1565       low  = 0; /* assume unsorted */
1566       while (high-low > 5) {
1567         t = (low+high)/2;
1568         if (rp[t] > bcol) high = t;
1569         else             low  = t;
1570       }
1571       for ( i=low; i<high; i++ ) {
1572         if (rp[i] > bcol) break;
1573         if (rp[i] == bcol) {
1574           *v++ = ap[bs2*i+bs*cidx+ridx];
1575           goto finished;
1576         }
1577       }
1578       *v++ = zero;
1579       finished:;
1580     }
1581   }
1582   return 0;
1583 }
1584 
1585 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1586 {
1587   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1588   *bs = baij->bs;
1589   return 0;
1590 }
1591 
1592 /* idx should be of length atlease bs */
1593 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1594 {
1595   int i,row;
1596   row = idx[0];
1597   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1598 
1599   for ( i=1; i<bs; i++ ) {
1600     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1601   }
1602   *flg = PETSC_TRUE;
1603   return 0;
1604 }
1605 
1606 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1607 {
1608   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1609   IS          is_local;
1610   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1611   PetscTruth  flg;
1612   Scalar      zero = 0.0,*aa;
1613 
1614   /* Make a copy of the IS and  sort it */
1615   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1616   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1617   ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1618   ierr = ISSort(is_local); CHKERRQ(ierr);
1619   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1620 
1621   i = 0;
1622   while (i < is_n) {
1623     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1624     flg = PETSC_FALSE;
1625     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1626     if (flg) { /* There exists a block of rows to be Zerowed */
1627       baij->ilen[rows[i]/bs] = 0;
1628       i += bs;
1629     } else { /* Zero out only the requested row */
1630       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1631       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1632       for ( j=0; j<count; j++ ) {
1633         aa[0] = zero;
1634         aa+=bs;
1635       }
1636       i++;
1637     }
1638   }
1639   if (diag) {
1640     for ( j=0; j<is_n; j++ ) {
1641       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1642     }
1643   }
1644   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1645   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1646   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1647   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1649 
1650   return 0;
1651 }
1652 int MatPrintHelp_SeqBAIJ(Mat A)
1653 {
1654   static int called = 0;
1655   MPI_Comm   comm = A->comm;
1656 
1657   if (called) return 0; else called = 1;
1658   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1659   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1660   return 0;
1661 }
1662 
1663 /* -------------------------------------------------------------------*/
1664 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1665        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1666        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1667        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1668        MatSolve_SeqBAIJ_N,0,
1669        0,0,
1670        MatLUFactor_SeqBAIJ,0,
1671        0,
1672        MatTranspose_SeqBAIJ,
1673        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1674        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1675        0,MatAssemblyEnd_SeqBAIJ,
1676        0,
1677        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1678        MatGetReordering_SeqBAIJ,
1679        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1680        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1681        MatILUFactorSymbolic_SeqBAIJ,0,
1682        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1683        MatGetSubMatrix_SeqBAIJ,0,
1684        MatConvertSameType_SeqBAIJ,0,0,
1685        MatILUFactor_SeqBAIJ,0,0,
1686        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1687        MatGetValues_SeqBAIJ,0,
1688        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1689        0,0,0,MatGetBlockSize_SeqBAIJ};
1690 
1691 /*@C
1692    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1693    compressed row) format.  For good matrix assembly performance the
1694    user should preallocate the matrix storage by setting the parameter nz
1695    (or the array nzz).  By setting these parameters accurately, performance
1696    during matrix assembly can be increased by more than a factor of 50.
1697 
1698    Input Parameters:
1699 .  comm - MPI communicator, set to MPI_COMM_SELF
1700 .  bs - size of block
1701 .  m - number of rows
1702 .  n - number of columns
1703 .  nz - number of block nonzeros per block row (same for all rows)
1704 .  nzz - array containing the number of block nonzeros in the various block rows
1705          (possibly different for each block row) or PETSC_NULL
1706 
1707    Output Parameter:
1708 .  A - the matrix
1709 
1710    Notes:
1711    The block AIJ format is fully compatible with standard Fortran 77
1712    storage.  That is, the stored row and column indices can begin at
1713    either one (as in Fortran) or zero.  See the users' manual for details.
1714 
1715    Specify the preallocated storage with either nz or nnz (not both).
1716    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1717    allocation.  For additional details, see the users manual chapter on
1718    matrices and the file $(PETSC_DIR)/Performance.
1719 
1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1721 @*/
1722 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1723 {
1724   Mat         B;
1725   Mat_SeqBAIJ *b;
1726   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1727 
1728   if (mbs*bs!=m || nbs*bs!=n)
1729     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1730 
1731   *A = 0;
1732   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1733   PLogObjectCreate(B);
1734   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1735   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1736   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1737   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1738   if (!flg) {
1739     switch (bs) {
1740       case 1:
1741         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1742         B->ops.solve           = MatSolve_SeqBAIJ_1;
1743         B->ops.mult            = MatMult_SeqBAIJ_1;
1744         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1745        break;
1746       case 2:
1747         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1748         B->ops.solve           = MatSolve_SeqBAIJ_2;
1749         B->ops.mult            = MatMult_SeqBAIJ_2;
1750         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1751         break;
1752       case 3:
1753         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1754         B->ops.solve           = MatSolve_SeqBAIJ_3;
1755         B->ops.mult            = MatMult_SeqBAIJ_3;
1756         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1757         break;
1758       case 4:
1759         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1760         B->ops.solve           = MatSolve_SeqBAIJ_4;
1761         B->ops.mult            = MatMult_SeqBAIJ_4;
1762         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1763         break;
1764       case 5:
1765         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1766         B->ops.solve           = MatSolve_SeqBAIJ_5;
1767         B->ops.mult            = MatMult_SeqBAIJ_5;
1768         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1769         break;
1770     }
1771   }
1772   B->destroy          = MatDestroy_SeqBAIJ;
1773   B->view             = MatView_SeqBAIJ;
1774   B->factor           = 0;
1775   B->lupivotthreshold = 1.0;
1776   b->row              = 0;
1777   b->col              = 0;
1778   b->reallocs         = 0;
1779 
1780   b->m       = m; B->m = m; B->M = m;
1781   b->n       = n; B->n = n; B->N = n;
1782   b->mbs     = mbs;
1783   b->nbs     = nbs;
1784   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1785   if (nnz == PETSC_NULL) {
1786     if (nz == PETSC_DEFAULT) nz = 5;
1787     else if (nz <= 0)        nz = 1;
1788     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1789     nz = nz*mbs;
1790   }
1791   else {
1792     nz = 0;
1793     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1794   }
1795 
1796   /* allocate the matrix space */
1797   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1798   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1799   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1800   b->j  = (int *) (b->a + nz*bs2);
1801   PetscMemzero(b->j,nz*sizeof(int));
1802   b->i  = b->j + nz;
1803   b->singlemalloc = 1;
1804 
1805   b->i[0] = 0;
1806   for (i=1; i<mbs+1; i++) {
1807     b->i[i] = b->i[i-1] + b->imax[i-1];
1808   }
1809 
1810   /* b->ilen will count nonzeros in each block row so far. */
1811   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1812   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1813   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1814 
1815   b->bs               = bs;
1816   b->bs2              = bs2;
1817   b->mbs              = mbs;
1818   b->nz               = 0;
1819   b->maxnz            = nz*bs2;
1820   b->sorted           = 0;
1821   b->roworiented      = 1;
1822   b->nonew            = 0;
1823   b->diag             = 0;
1824   b->solve_work       = 0;
1825   b->mult_work        = 0;
1826   b->spptr            = 0;
1827   *A = B;
1828   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1829   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1830   return 0;
1831 }
1832 
1833 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1834 {
1835   Mat         C;
1836   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1837   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1838 
1839   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1840 
1841   *B = 0;
1842   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1843   PLogObjectCreate(C);
1844   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1845   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1846   C->destroy    = MatDestroy_SeqBAIJ;
1847   C->view       = MatView_SeqBAIJ;
1848   C->factor     = A->factor;
1849   c->row        = 0;
1850   c->col        = 0;
1851   C->assembled  = PETSC_TRUE;
1852 
1853   c->m = C->m   = a->m;
1854   c->n = C->n   = a->n;
1855   C->M          = a->m;
1856   C->N          = a->n;
1857 
1858   c->bs         = a->bs;
1859   c->bs2        = a->bs2;
1860   c->mbs        = a->mbs;
1861   c->nbs        = a->nbs;
1862 
1863   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1864   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1865   for ( i=0; i<mbs; i++ ) {
1866     c->imax[i] = a->imax[i];
1867     c->ilen[i] = a->ilen[i];
1868   }
1869 
1870   /* allocate the matrix space */
1871   c->singlemalloc = 1;
1872   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1873   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1874   c->j  = (int *) (c->a + nz*bs2);
1875   c->i  = c->j + nz;
1876   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1877   if (mbs > 0) {
1878     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1879     if (cpvalues == COPY_VALUES) {
1880       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1881     }
1882   }
1883 
1884   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1885   c->sorted      = a->sorted;
1886   c->roworiented = a->roworiented;
1887   c->nonew       = a->nonew;
1888 
1889   if (a->diag) {
1890     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1891     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1892     for ( i=0; i<mbs; i++ ) {
1893       c->diag[i] = a->diag[i];
1894     }
1895   }
1896   else c->diag          = 0;
1897   c->nz                 = a->nz;
1898   c->maxnz              = a->maxnz;
1899   c->solve_work         = 0;
1900   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1901   c->mult_work          = 0;
1902   *B = C;
1903   return 0;
1904 }
1905 
1906 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1907 {
1908   Mat_SeqBAIJ  *a;
1909   Mat          B;
1910   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1911   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1912   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1913   int          *masked, nmask,tmp,bs2,ishift;
1914   Scalar       *aa;
1915   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1916 
1917   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1918   bs2  = bs*bs;
1919 
1920   MPI_Comm_size(comm,&size);
1921   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1922   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1923   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1924   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1925   M = header[1]; N = header[2]; nz = header[3];
1926 
1927   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1928 
1929   /*
1930      This code adds extra rows to make sure the number of rows is
1931     divisible by the blocksize
1932   */
1933   mbs        = M/bs;
1934   extra_rows = bs - M + bs*(mbs);
1935   if (extra_rows == bs) extra_rows = 0;
1936   else                  mbs++;
1937   if (extra_rows) {
1938     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1939   }
1940 
1941   /* read in row lengths */
1942   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1943   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1944   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1945 
1946   /* read in column indices */
1947   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1948   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1949   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1950 
1951   /* loop over row lengths determining block row lengths */
1952   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1953   PetscMemzero(browlengths,mbs*sizeof(int));
1954   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1955   PetscMemzero(mask,mbs*sizeof(int));
1956   masked = mask + mbs;
1957   rowcount = 0; nzcount = 0;
1958   for ( i=0; i<mbs; i++ ) {
1959     nmask = 0;
1960     for ( j=0; j<bs; j++ ) {
1961       kmax = rowlengths[rowcount];
1962       for ( k=0; k<kmax; k++ ) {
1963         tmp = jj[nzcount++]/bs;
1964         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1965       }
1966       rowcount++;
1967     }
1968     browlengths[i] += nmask;
1969     /* zero out the mask elements we set */
1970     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1971   }
1972 
1973   /* create our matrix */
1974   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1975          CHKERRQ(ierr);
1976   B = *A;
1977   a = (Mat_SeqBAIJ *) B->data;
1978 
1979   /* set matrix "i" values */
1980   a->i[0] = 0;
1981   for ( i=1; i<= mbs; i++ ) {
1982     a->i[i]      = a->i[i-1] + browlengths[i-1];
1983     a->ilen[i-1] = browlengths[i-1];
1984   }
1985   a->nz         = 0;
1986   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1987 
1988   /* read in nonzero values */
1989   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1990   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1991   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1992 
1993   /* set "a" and "j" values into matrix */
1994   nzcount = 0; jcount = 0;
1995   for ( i=0; i<mbs; i++ ) {
1996     nzcountb = nzcount;
1997     nmask    = 0;
1998     for ( j=0; j<bs; j++ ) {
1999       kmax = rowlengths[i*bs+j];
2000       for ( k=0; k<kmax; k++ ) {
2001         tmp = jj[nzcount++]/bs;
2002 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2003       }
2004       rowcount++;
2005     }
2006     /* sort the masked values */
2007     PetscSortInt(nmask,masked);
2008 
2009     /* set "j" values into matrix */
2010     maskcount = 1;
2011     for ( j=0; j<nmask; j++ ) {
2012       a->j[jcount++]  = masked[j];
2013       mask[masked[j]] = maskcount++;
2014     }
2015     /* set "a" values into matrix */
2016     ishift = bs2*a->i[i];
2017     for ( j=0; j<bs; j++ ) {
2018       kmax = rowlengths[i*bs+j];
2019       for ( k=0; k<kmax; k++ ) {
2020         tmp    = jj[nzcountb]/bs ;
2021         block  = mask[tmp] - 1;
2022         point  = jj[nzcountb] - bs*tmp;
2023         idx    = ishift + bs2*block + j + bs*point;
2024         a->a[idx] = aa[nzcountb++];
2025       }
2026     }
2027     /* zero out the mask elements we set */
2028     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2029   }
2030   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2031 
2032   PetscFree(rowlengths);
2033   PetscFree(browlengths);
2034   PetscFree(aa);
2035   PetscFree(jj);
2036   PetscFree(mask);
2037 
2038   B->assembled = PETSC_TRUE;
2039 
2040   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2041   if (flg) {
2042     Viewer tviewer;
2043     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2044     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
2045     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2046     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2047   }
2048   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2049   if (flg) {
2050     Viewer tviewer;
2051     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2052     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
2053     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2054     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2055   }
2056   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2057   if (flg) {
2058     Viewer tviewer;
2059     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2060     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2061     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2062   }
2063   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2064   if (flg) {
2065     Viewer tviewer;
2066     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2067     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
2068     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2069     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2070   }
2071   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2072   if (flg) {
2073     Viewer tviewer;
2074     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2075     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2076     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2077     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2078   }
2079   return 0;
2080 }
2081 
2082 
2083 
2084