xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d402145b019fbc790513a600f7ded77b5b6798cb)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.83 1997/01/22 18:43:37 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 #undef __FUNC__
20 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21 int MatMarkDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         i,j, *diag, m = a->mbs;
25 
26   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27   PLogObjectMemory(A,(m+1)*sizeof(int));
28   for ( i=0; i<m; i++ ) {
29     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30       if (a->j[j] == i) {
31         diag[i] = j;
32         break;
33       }
34     }
35   }
36   a->diag = diag;
37   return 0;
38 }
39 
40 #include "draw.h"
41 #include "pinclude/pviewer.h"
42 #include "sys.h"
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary"
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw"
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ"
346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert;
418         }
419       }
420       if (nonew) goto noinsert;
421       if (nrow >= rmax) {
422         /* there is no extra room in row, therefore enlarge */
423         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424         Scalar *new_a;
425 
426         /* malloc new storage space */
427         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
429         new_j   = (int *) (new_a + bs2*new_nz);
430         new_i   = new_j + new_nz;
431 
432         /* copy over old data into new slots */
433         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
434         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438                                                            len*sizeof(int));
439         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443         /* free up old matrix storage */
444         PetscFree(a->a);
445         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447         a->singlemalloc = 1;
448 
449         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
451         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
452         a->maxnz += bs2*CHUNKSIZE;
453         a->reallocs++;
454         a->nz++;
455       }
456       N = nrow++ - 1;
457       /* shift up all the later entries in this row */
458       for ( ii=N; ii>=i; ii-- ) {
459         rp[ii+1] = rp[ii];
460         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461       }
462       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463       rp[i]                      = bcol;
464       ap[bs2*i + bs*cidx + ridx] = value;
465       noinsert:;
466       low = i;
467     }
468     ailen[brow] = nrow;
469   }
470   return 0;
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ "MatGetSize_SeqBAIJ"
475 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
476 {
477   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
478   *m = a->m; *n = a->n;
479   return 0;
480 }
481 
482 #undef __FUNC__
483 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
484 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
485 {
486   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
487   *m = 0; *n = a->m;
488   return 0;
489 }
490 
491 #undef __FUNC__
492 #define __FUNC__ "MatGetRow_SeqBAIJ"
493 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
494 {
495   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
496   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
497   Scalar      *aa,*v_i,*aa_i;
498 
499   bs  = a->bs;
500   ai  = a->i;
501   aj  = a->j;
502   aa  = a->a;
503   bs2 = a->bs2;
504 
505   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
506 
507   bn  = row/bs;   /* Block number */
508   bp  = row % bs; /* Block Position */
509   M   = ai[bn+1] - ai[bn];
510   *nz = bs*M;
511 
512   if (v) {
513     *v = 0;
514     if (*nz) {
515       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
516       for ( i=0; i<M; i++ ) { /* for each block in the block row */
517         v_i  = *v + i*bs;
518         aa_i = aa + bs2*(ai[bn] + i);
519         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
520       }
521     }
522   }
523 
524   if (idx) {
525     *idx = 0;
526     if (*nz) {
527       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
528       for ( i=0; i<M; i++ ) { /* for each block in the block row */
529         idx_i = *idx + i*bs;
530         itmp  = bs*aj[ai[bn] + i];
531         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
532       }
533     }
534   }
535   return 0;
536 }
537 
538 #undef __FUNC__
539 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
540 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
541 {
542   if (idx) {if (*idx) PetscFree(*idx);}
543   if (v)   {if (*v)   PetscFree(*v);}
544   return 0;
545 }
546 
547 #undef __FUNC__
548 #define __FUNC__ "MatTranspose_SeqBAIJ"
549 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
550 {
551   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
552   Mat         C;
553   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
554   int         *rows,*cols,bs2=a->bs2;
555   Scalar      *array=a->a;
556 
557   if (B==PETSC_NULL && mbs!=nbs)
558     SETERRQ(1,0,"Square matrix only for in-place");
559   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
560   PetscMemzero(col,(1+nbs)*sizeof(int));
561 
562   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
563   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
564   PetscFree(col);
565   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
566   cols = rows + bs;
567   for ( i=0; i<mbs; i++ ) {
568     cols[0] = i*bs;
569     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
570     len = ai[i+1] - ai[i];
571     for ( j=0; j<len; j++ ) {
572       rows[0] = (*aj++)*bs;
573       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
574       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
575       array += bs2;
576     }
577   }
578   PetscFree(rows);
579 
580   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
581   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
582 
583   if (B != PETSC_NULL) {
584     *B = C;
585   } else {
586     /* This isn't really an in-place transpose */
587     PetscFree(a->a);
588     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
589     if (a->diag) PetscFree(a->diag);
590     if (a->ilen) PetscFree(a->ilen);
591     if (a->imax) PetscFree(a->imax);
592     if (a->solve_work) PetscFree(a->solve_work);
593     PetscFree(a);
594     PetscMemcpy(A,C,sizeof(struct _Mat));
595     PetscHeaderDestroy(C);
596   }
597   return 0;
598 }
599 
600 
601 #undef __FUNC__
602 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
603 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
604 {
605   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
606   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
607   int        m = a->m,*ip, N, *ailen = a->ilen;
608   int        mbs = a->mbs, bs2 = a->bs2,rmax;
609   Scalar     *aa = a->a, *ap;
610 
611   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
612 
613   rmax = ailen[0];
614   for ( i=1; i<mbs; i++ ) {
615     /* move each row back by the amount of empty slots (fshift) before it*/
616     fshift += imax[i-1] - ailen[i-1];
617     rmax   = PetscMax(rmax,ailen[i]);
618     if (fshift) {
619       ip = aj + ai[i]; ap = aa + bs2*ai[i];
620       N = ailen[i];
621       for ( j=0; j<N; j++ ) {
622         ip[j-fshift] = ip[j];
623         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
624       }
625     }
626     ai[i] = ai[i-1] + ailen[i-1];
627   }
628   if (mbs) {
629     fshift += imax[mbs-1] - ailen[mbs-1];
630     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
631   }
632   /* reset ilen and imax for each row */
633   for ( i=0; i<mbs; i++ ) {
634     ailen[i] = imax[i] = ai[i+1] - ai[i];
635   }
636   a->nz = ai[mbs];
637 
638   /* diagonals may have moved, so kill the diagonal pointers */
639   if (fshift && a->diag) {
640     PetscFree(a->diag);
641     PLogObjectMemory(A,-(m+1)*sizeof(int));
642     a->diag = 0;
643   }
644   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
645            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
646   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
647            a->reallocs);
648   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
649   a->reallocs          = 0;
650   A->info.nz_unneeded  = (double)fshift*bs2;
651 
652   return 0;
653 }
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
657 static int MatZeroEntries_SeqBAIJ(Mat A)
658 {
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
660   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
661   return 0;
662 }
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatDestroy_SeqBAIJ"
666 int MatDestroy_SeqBAIJ(PetscObject obj)
667 {
668   Mat         A  = (Mat) obj;
669   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
670   int         ierr;
671 
672 #if defined(PETSC_LOG)
673   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
674 #endif
675   PetscFree(a->a);
676   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
677   if (a->diag) PetscFree(a->diag);
678   if (a->ilen) PetscFree(a->ilen);
679   if (a->imax) PetscFree(a->imax);
680   if (a->solve_work) PetscFree(a->solve_work);
681   if (a->mult_work) PetscFree(a->mult_work);
682   PetscFree(a);
683   if (A->mapping) {
684     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
685   }
686   PLogObjectDestroy(A);
687   PetscHeaderDestroy(A);
688   return 0;
689 }
690 
691 #undef __FUNC__
692 #define __FUNC__ "MatSetOption_SeqBAIJ"
693 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
694 {
695   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
696   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
697   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
698   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
699   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
700   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
701   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
702   else if (op == MAT_ROWS_SORTED ||
703            op == MAT_ROWS_UNSORTED ||
704            op == MAT_SYMMETRIC ||
705            op == MAT_STRUCTURALLY_SYMMETRIC ||
706            op == MAT_YES_NEW_DIAGONALS ||
707            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
708     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
709   else if (op == MAT_NO_NEW_DIAGONALS)
710     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
711   else
712     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
713   return 0;
714 }
715 
716 
717 /* -------------------------------------------------------*/
718 /* Should check that shapes of vectors and matrices match */
719 /* -------------------------------------------------------*/
720 #include "pinclude/plapack.h"
721 
722 #undef __FUNC__
723 #define __FUNC__ "MatMult_SeqBAIJ_1"
724 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
725 {
726   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
727   register Scalar *x,*z,*v,sum;
728   int             mbs=a->mbs,i,*idx,*ii,n;
729 
730   VecGetArray_Fast(xx,x);
731   VecGetArray_Fast(zz,z);
732 
733   idx   = a->j;
734   v     = a->a;
735   ii    = a->i;
736 
737   for ( i=0; i<mbs; i++ ) {
738     n    = ii[1] - ii[0]; ii++;
739     sum  = 0.0;
740     while (n--) sum += *v++ * x[*idx++];
741     z[i] = sum;
742   }
743   VecRestoreArray_Fast(xx,x);
744   VecRestoreArray_Fast(zz,z);
745   PLogFlops(2*a->nz - a->m);
746   return 0;
747 }
748 
749 #undef __FUNC__
750 #define __FUNC__ "MatMult_SeqBAIJ_2"
751 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
752 {
753   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
754   register Scalar *x,*z,*v,*xb,sum1,sum2;
755   register Scalar x1,x2;
756   int             mbs=a->mbs,i,*idx,*ii;
757   int             j,n;
758 
759   VecGetArray_Fast(xx,x);
760   VecGetArray_Fast(zz,z);
761 
762   idx   = a->j;
763   v     = a->a;
764   ii    = a->i;
765 
766   for ( i=0; i<mbs; i++ ) {
767     n  = ii[1] - ii[0]; ii++;
768     sum1 = 0.0; sum2 = 0.0;
769     for ( j=0; j<n; j++ ) {
770       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
771       sum1 += v[0]*x1 + v[2]*x2;
772       sum2 += v[1]*x1 + v[3]*x2;
773       v += 4;
774     }
775     z[0] = sum1; z[1] = sum2;
776     z += 2;
777   }
778   VecRestoreArray_Fast(xx,x);
779   VecRestoreArray_Fast(zz,z);
780   PLogFlops(4*a->nz - a->m);
781   return 0;
782 }
783 
784 #undef __FUNC__
785 #define __FUNC__ "MatMult_SeqBAIJ_3"
786 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
787 {
788   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
789   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
790   int             mbs=a->mbs,i,*idx,*ii,j,n;
791 
792   VecGetArray_Fast(xx,x);
793   VecGetArray_Fast(zz,z);
794 
795   idx   = a->j;
796   v     = a->a;
797   ii    = a->i;
798 
799   for ( i=0; i<mbs; i++ ) {
800     n  = ii[1] - ii[0]; ii++;
801     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
802     for ( j=0; j<n; j++ ) {
803       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
804       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
805       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
806       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
807       v += 9;
808     }
809     z[0] = sum1; z[1] = sum2; z[2] = sum3;
810     z += 3;
811   }
812   VecRestoreArray_Fast(xx,x);
813   VecRestoreArray_Fast(zz,z);
814   PLogFlops(18*a->nz - a->m);
815   return 0;
816 }
817 
818 #undef __FUNC__
819 #define __FUNC__ "MatMult_SeqBAIJ_4"
820 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
821 {
822   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
823   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
824   register Scalar x1,x2,x3,x4;
825   int             mbs=a->mbs,i,*idx,*ii;
826   int             j,n;
827 
828   VecGetArray_Fast(xx,x);
829   VecGetArray_Fast(zz,z);
830 
831   idx   = a->j;
832   v     = a->a;
833   ii    = a->i;
834 
835   for ( i=0; i<mbs; i++ ) {
836     n  = ii[1] - ii[0]; ii++;
837     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
838     for ( j=0; j<n; j++ ) {
839       xb = x + 4*(*idx++);
840       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
841       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
842       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
843       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
844       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
845       v += 16;
846     }
847     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
848     z += 4;
849   }
850   VecRestoreArray_Fast(xx,x);
851   VecRestoreArray_Fast(zz,z);
852   PLogFlops(32*a->nz - a->m);
853   return 0;
854 }
855 
856 #undef __FUNC__
857 #define __FUNC__ "MatMult_SeqBAIJ_5"
858 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
859 {
860   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
861   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
862   register Scalar x1,x2,x3,x4,x5;
863   int             mbs=a->mbs,i,*idx,*ii,j,n;
864 
865   VecGetArray_Fast(xx,x);
866   VecGetArray_Fast(zz,z);
867 
868   idx   = a->j;
869   v     = a->a;
870   ii    = a->i;
871 
872   for ( i=0; i<mbs; i++ ) {
873     n  = ii[1] - ii[0]; ii++;
874     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
875     for ( j=0; j<n; j++ ) {
876       xb = x + 5*(*idx++);
877       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
878       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
879       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
880       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
881       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
882       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
883       v += 25;
884     }
885     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
886     z += 5;
887   }
888   VecRestoreArray_Fast(xx,x);
889   VecRestoreArray_Fast(zz,z);
890   PLogFlops(50*a->nz - a->m);
891   return 0;
892 }
893 
894 #undef __FUNC__
895 #define __FUNC__ "MatMult_SeqBAIJ_N"
896 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
897 {
898   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
899   register Scalar *x,*z,*v,*xb;
900   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
901   int             _One = 1,ncols,k;
902   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
903 
904   VecGetArray_Fast(xx,x);
905   VecGetArray_Fast(zz,z);
906 
907   idx   = a->j;
908   v     = a->a;
909   ii    = a->i;
910 
911 
912   if (!a->mult_work) {
913     k = PetscMax(a->m,a->n);
914     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
915   }
916   work = a->mult_work;
917   for ( i=0; i<mbs; i++ ) {
918     n     = ii[1] - ii[0]; ii++;
919     ncols = n*bs;
920     workt = work;
921     for ( j=0; j<n; j++ ) {
922       xb = x + bs*(*idx++);
923       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
924       workt += bs;
925     }
926     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
927     v += n*bs2;
928     z += bs;
929   }
930   VecRestoreArray_Fast(xx,x);
931   VecRestoreArray_Fast(zz,z);
932   PLogFlops(2*a->nz*bs2 - a->m);
933   return 0;
934 }
935 
936 #undef __FUNC__
937 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
938 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
939 {
940   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
941   register Scalar *x,*y,*z,*v,sum;
942   int             mbs=a->mbs,i,*idx,*ii,n;
943 
944   VecGetArray_Fast(xx,x);
945   VecGetArray_Fast(yy,y);
946   VecGetArray_Fast(zz,z);
947 
948   idx   = a->j;
949   v     = a->a;
950   ii    = a->i;
951 
952   for ( i=0; i<mbs; i++ ) {
953     n    = ii[1] - ii[0]; ii++;
954     sum  = y[i];
955     while (n--) sum += *v++ * x[*idx++];
956     z[i] = sum;
957   }
958   VecRestoreArray_Fast(xx,x);
959   VecRestoreArray_Fast(yy,y);
960   VecRestoreArray_Fast(zz,z);
961   PLogFlops(2*a->nz);
962   return 0;
963 }
964 
965 #undef __FUNC__
966 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
967 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
968 {
969   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
970   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
971   register Scalar x1,x2;
972   int             mbs=a->mbs,i,*idx,*ii;
973   int             j,n;
974 
975   VecGetArray_Fast(xx,x);
976   VecGetArray_Fast(yy,y);
977   VecGetArray_Fast(zz,z);
978 
979   idx   = a->j;
980   v     = a->a;
981   ii    = a->i;
982 
983   for ( i=0; i<mbs; i++ ) {
984     n  = ii[1] - ii[0]; ii++;
985     sum1 = y[0]; sum2 = y[1];
986     for ( j=0; j<n; j++ ) {
987       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
988       sum1 += v[0]*x1 + v[2]*x2;
989       sum2 += v[1]*x1 + v[3]*x2;
990       v += 4;
991     }
992     z[0] = sum1; z[1] = sum2;
993     z += 2; y += 2;
994   }
995   VecRestoreArray_Fast(xx,x);
996   VecRestoreArray_Fast(yy,y);
997   VecRestoreArray_Fast(zz,z);
998   PLogFlops(4*a->nz);
999   return 0;
1000 }
1001 
1002 #undef __FUNC__
1003 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1004 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1005 {
1006   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1007   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1008   int             mbs=a->mbs,i,*idx,*ii,j,n;
1009 
1010   VecGetArray_Fast(xx,x);
1011   VecGetArray_Fast(yy,y);
1012   VecGetArray_Fast(zz,z);
1013 
1014   idx   = a->j;
1015   v     = a->a;
1016   ii    = a->i;
1017 
1018   for ( i=0; i<mbs; i++ ) {
1019     n  = ii[1] - ii[0]; ii++;
1020     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1021     for ( j=0; j<n; j++ ) {
1022       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1023       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1024       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1025       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1026       v += 9;
1027     }
1028     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1029     z += 3; y += 3;
1030   }
1031   VecRestoreArray_Fast(xx,x);
1032   VecRestoreArray_Fast(yy,y);
1033   VecRestoreArray_Fast(zz,z);
1034   PLogFlops(18*a->nz);
1035   return 0;
1036 }
1037 
1038 #undef __FUNC__
1039 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1040 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1041 {
1042   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1043   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1044   register Scalar x1,x2,x3,x4;
1045   int             mbs=a->mbs,i,*idx,*ii;
1046   int             j,n;
1047 
1048   VecGetArray_Fast(xx,x);
1049   VecGetArray_Fast(yy,y);
1050   VecGetArray_Fast(zz,z);
1051 
1052   idx   = a->j;
1053   v     = a->a;
1054   ii    = a->i;
1055 
1056   for ( i=0; i<mbs; i++ ) {
1057     n  = ii[1] - ii[0]; ii++;
1058     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1059     for ( j=0; j<n; j++ ) {
1060       xb = x + 4*(*idx++);
1061       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1062       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1063       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1064       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1065       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1066       v += 16;
1067     }
1068     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1069     z += 4; y += 4;
1070   }
1071   VecRestoreArray_Fast(xx,x);
1072   VecRestoreArray_Fast(yy,y);
1073   VecRestoreArray_Fast(zz,z);
1074   PLogFlops(32*a->nz);
1075   return 0;
1076 }
1077 
1078 #undef __FUNC__
1079 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1080 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1081 {
1082   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1083   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1084   register Scalar x1,x2,x3,x4,x5;
1085   int             mbs=a->mbs,i,*idx,*ii,j,n;
1086 
1087   VecGetArray_Fast(xx,x);
1088   VecGetArray_Fast(yy,y);
1089   VecGetArray_Fast(zz,z);
1090 
1091   idx   = a->j;
1092   v     = a->a;
1093   ii    = a->i;
1094 
1095   for ( i=0; i<mbs; i++ ) {
1096     n  = ii[1] - ii[0]; ii++;
1097     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1098     for ( j=0; j<n; j++ ) {
1099       xb = x + 5*(*idx++);
1100       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1101       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1102       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1103       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1104       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1105       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1106       v += 25;
1107     }
1108     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1109     z += 5; y += 5;
1110   }
1111   VecRestoreArray_Fast(xx,x);
1112   VecRestoreArray_Fast(yy,y);
1113   VecRestoreArray_Fast(zz,z);
1114   PLogFlops(50*a->nz);
1115   return 0;
1116 }
1117 
1118 #undef __FUNC__
1119 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1120 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1121 {
1122   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1123   register Scalar *x,*z,*v,*xb;
1124   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1125   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1126 
1127   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1128 
1129   VecGetArray_Fast(xx,x);
1130   VecGetArray_Fast(zz,z);
1131 
1132   idx   = a->j;
1133   v     = a->a;
1134   ii    = a->i;
1135 
1136 
1137   if (!a->mult_work) {
1138     k = PetscMax(a->m,a->n);
1139     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1140   }
1141   work = a->mult_work;
1142   for ( i=0; i<mbs; i++ ) {
1143     n     = ii[1] - ii[0]; ii++;
1144     ncols = n*bs;
1145     workt = work;
1146     for ( j=0; j<n; j++ ) {
1147       xb = x + bs*(*idx++);
1148       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1149       workt += bs;
1150     }
1151     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1152     v += n*bs2;
1153     z += bs;
1154   }
1155   VecRestoreArray_Fast(xx,x);
1156   VecRestoreArray_Fast(zz,z);
1157   PLogFlops(2*a->nz*bs2 );
1158   return 0;
1159 }
1160 
1161 #undef __FUNC__
1162 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1163 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1164 {
1165   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1166   Scalar          *xg,*zg,*zb;
1167   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1168   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1169   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1170 
1171 
1172   VecGetArray_Fast(xx,xg); x = xg;
1173   VecGetArray_Fast(zz,zg); z = zg;
1174   PetscMemzero(z,N*sizeof(Scalar));
1175 
1176   idx   = a->j;
1177   v     = a->a;
1178   ii    = a->i;
1179 
1180   switch (bs) {
1181   case 1:
1182     for ( i=0; i<mbs; i++ ) {
1183       n  = ii[1] - ii[0]; ii++;
1184       xb = x + i; x1 = xb[0];
1185       ib = idx + ai[i];
1186       for ( j=0; j<n; j++ ) {
1187         rval    = ib[j];
1188         z[rval] += *v++ * x1;
1189       }
1190     }
1191     break;
1192   case 2:
1193     for ( i=0; i<mbs; i++ ) {
1194       n  = ii[1] - ii[0]; ii++;
1195       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1196       ib = idx + ai[i];
1197       for ( j=0; j<n; j++ ) {
1198         rval      = ib[j]*2;
1199         z[rval++] += v[0]*x1 + v[1]*x2;
1200         z[rval++] += v[2]*x1 + v[3]*x2;
1201         v += 4;
1202       }
1203     }
1204     break;
1205   case 3:
1206     for ( i=0; i<mbs; i++ ) {
1207       n  = ii[1] - ii[0]; ii++;
1208       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1209       ib = idx + ai[i];
1210       for ( j=0; j<n; j++ ) {
1211         rval      = ib[j]*3;
1212         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1213         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1214         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1215         v += 9;
1216       }
1217     }
1218     break;
1219   case 4:
1220     for ( i=0; i<mbs; i++ ) {
1221       n  = ii[1] - ii[0]; ii++;
1222       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1223       ib = idx + ai[i];
1224       for ( j=0; j<n; j++ ) {
1225         rval      = ib[j]*4;
1226         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1227         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1228         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1229         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1230         v += 16;
1231       }
1232     }
1233     break;
1234   case 5:
1235     for ( i=0; i<mbs; i++ ) {
1236       n  = ii[1] - ii[0]; ii++;
1237       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1238       x4 = xb[3];   x5 = xb[4];
1239       ib = idx + ai[i];
1240       for ( j=0; j<n; j++ ) {
1241         rval      = ib[j]*5;
1242         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1243         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1244         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1245         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1246         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1247         v += 25;
1248       }
1249     }
1250     break;
1251       /* block sizes larger then 5 by 5 are handled by BLAS */
1252     default: {
1253       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1254       if (!a->mult_work) {
1255         k = PetscMax(a->m,a->n);
1256         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1257         CHKPTRQ(a->mult_work);
1258       }
1259       work = a->mult_work;
1260       for ( i=0; i<mbs; i++ ) {
1261         n     = ii[1] - ii[0]; ii++;
1262         ncols = n*bs;
1263         PetscMemzero(work,ncols*sizeof(Scalar));
1264         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1265         v += n*bs2;
1266         x += bs;
1267         workt = work;
1268         for ( j=0; j<n; j++ ) {
1269           zb = z + bs*(*idx++);
1270           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1271           workt += bs;
1272         }
1273       }
1274     }
1275   }
1276   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1277   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1278   PLogFlops(2*a->nz*a->bs2 - a->n);
1279   return 0;
1280 }
1281 
1282 #undef __FUNC__
1283 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1284 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1285 {
1286   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1287   Scalar          *xg,*zg,*zb;
1288   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1289   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1290   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1291 
1292 
1293 
1294   VecGetArray_Fast(xx,xg); x = xg;
1295   VecGetArray_Fast(zz,zg); z = zg;
1296 
1297   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1298   else PetscMemzero(z,N*sizeof(Scalar));
1299 
1300 
1301   idx   = a->j;
1302   v     = a->a;
1303   ii    = a->i;
1304 
1305   switch (bs) {
1306   case 1:
1307     for ( i=0; i<mbs; i++ ) {
1308       n  = ii[1] - ii[0]; ii++;
1309       xb = x + i; x1 = xb[0];
1310       ib = idx + ai[i];
1311       for ( j=0; j<n; j++ ) {
1312         rval    = ib[j];
1313         z[rval] += *v++ * x1;
1314       }
1315     }
1316     break;
1317   case 2:
1318     for ( i=0; i<mbs; i++ ) {
1319       n  = ii[1] - ii[0]; ii++;
1320       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1321       ib = idx + ai[i];
1322       for ( j=0; j<n; j++ ) {
1323         rval      = ib[j]*2;
1324         z[rval++] += v[0]*x1 + v[1]*x2;
1325         z[rval++] += v[2]*x1 + v[3]*x2;
1326         v += 4;
1327       }
1328     }
1329     break;
1330   case 3:
1331     for ( i=0; i<mbs; i++ ) {
1332       n  = ii[1] - ii[0]; ii++;
1333       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1334       ib = idx + ai[i];
1335       for ( j=0; j<n; j++ ) {
1336         rval      = ib[j]*3;
1337         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1338         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1339         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1340         v += 9;
1341       }
1342     }
1343     break;
1344   case 4:
1345     for ( i=0; i<mbs; i++ ) {
1346       n  = ii[1] - ii[0]; ii++;
1347       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1348       ib = idx + ai[i];
1349       for ( j=0; j<n; j++ ) {
1350         rval      = ib[j]*4;
1351         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1352         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1353         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1354         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1355         v += 16;
1356       }
1357     }
1358     break;
1359   case 5:
1360     for ( i=0; i<mbs; i++ ) {
1361       n  = ii[1] - ii[0]; ii++;
1362       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1363       x4 = xb[3];   x5 = xb[4];
1364       ib = idx + ai[i];
1365       for ( j=0; j<n; j++ ) {
1366         rval      = ib[j]*5;
1367         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1368         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1369         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1370         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1371         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1372         v += 25;
1373       }
1374     }
1375     break;
1376       /* block sizes larger then 5 by 5 are handled by BLAS */
1377     default: {
1378       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1379       if (!a->mult_work) {
1380         k = PetscMax(a->m,a->n);
1381         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1382         CHKPTRQ(a->mult_work);
1383       }
1384       work = a->mult_work;
1385       for ( i=0; i<mbs; i++ ) {
1386         n     = ii[1] - ii[0]; ii++;
1387         ncols = n*bs;
1388         PetscMemzero(work,ncols*sizeof(Scalar));
1389         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1390         v += n*bs2;
1391         x += bs;
1392         workt = work;
1393         for ( j=0; j<n; j++ ) {
1394           zb = z + bs*(*idx++);
1395           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1396           workt += bs;
1397         }
1398       }
1399     }
1400   }
1401   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1402   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1403   PLogFlops(2*a->nz*a->bs2);
1404   return 0;
1405 }
1406 
1407 #undef __FUNC__
1408 #define __FUNC__ "MatGetInfo_SeqBAIJ"
1409 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1410 {
1411   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1412 
1413   info->rows_global    = (double)a->m;
1414   info->columns_global = (double)a->n;
1415   info->rows_local     = (double)a->m;
1416   info->columns_local  = (double)a->n;
1417   info->block_size     = a->bs2;
1418   info->nz_allocated   = a->maxnz;
1419   info->nz_used        = a->bs2*a->nz;
1420   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1421   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1422     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1423   info->assemblies   = A->num_ass;
1424   info->mallocs      = a->reallocs;
1425   info->memory       = A->mem;
1426   if (A->factor) {
1427     info->fill_ratio_given  = A->info.fill_ratio_given;
1428     info->fill_ratio_needed = A->info.fill_ratio_needed;
1429     info->factor_mallocs    = A->info.factor_mallocs;
1430   } else {
1431     info->fill_ratio_given  = 0;
1432     info->fill_ratio_needed = 0;
1433     info->factor_mallocs    = 0;
1434   }
1435   return 0;
1436 }
1437 
1438 #undef __FUNC__
1439 #define __FUNC__ "MatEqual_SeqBAIJ"
1440 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1441 {
1442   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1443 
1444   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1445 
1446   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1447   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1448       (a->nz != b->nz)) {
1449     *flg = PETSC_FALSE; return 0;
1450   }
1451 
1452   /* if the a->i are the same */
1453   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1454     *flg = PETSC_FALSE; return 0;
1455   }
1456 
1457   /* if a->j are the same */
1458   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1459     *flg = PETSC_FALSE; return 0;
1460   }
1461 
1462   /* if a->a are the same */
1463   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1464     *flg = PETSC_FALSE; return 0;
1465   }
1466   *flg = PETSC_TRUE;
1467   return 0;
1468 
1469 }
1470 
1471 #undef __FUNC__
1472 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1473 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1474 {
1475   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1476   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1477   Scalar      *x, zero = 0.0,*aa,*aa_j;
1478 
1479   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1480   bs   = a->bs;
1481   aa   = a->a;
1482   ai   = a->i;
1483   aj   = a->j;
1484   ambs = a->mbs;
1485   bs2  = a->bs2;
1486 
1487   VecSet(&zero,v);
1488   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1489   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1490   for ( i=0; i<ambs; i++ ) {
1491     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1492       if (aj[j] == i) {
1493         row  = i*bs;
1494         aa_j = aa+j*bs2;
1495         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1496         break;
1497       }
1498     }
1499   }
1500   return 0;
1501 }
1502 
1503 #undef __FUNC__
1504 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1505 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1506 {
1507   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1508   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1509   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1510 
1511   ai  = a->i;
1512   aj  = a->j;
1513   aa  = a->a;
1514   m   = a->m;
1515   n   = a->n;
1516   bs  = a->bs;
1517   mbs = a->mbs;
1518   bs2 = a->bs2;
1519   if (ll) {
1520     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1521     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1522     for ( i=0; i<mbs; i++ ) { /* for each block row */
1523       M  = ai[i+1] - ai[i];
1524       li = l + i*bs;
1525       v  = aa + bs2*ai[i];
1526       for ( j=0; j<M; j++ ) { /* for each block */
1527         for ( k=0; k<bs2; k++ ) {
1528           (*v++) *= li[k%bs];
1529         }
1530       }
1531     }
1532   }
1533 
1534   if (rr) {
1535     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1536     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1537     for ( i=0; i<mbs; i++ ) { /* for each block row */
1538       M  = ai[i+1] - ai[i];
1539       v  = aa + bs2*ai[i];
1540       for ( j=0; j<M; j++ ) { /* for each block */
1541         ri = r + bs*aj[ai[i]+j];
1542         for ( k=0; k<bs; k++ ) {
1543           x = ri[k];
1544           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1545         }
1546       }
1547     }
1548   }
1549   return 0;
1550 }
1551 
1552 
1553 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1554 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1555 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1556 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1557 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1558 
1559 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1560 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1561 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1562 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1563 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1564 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1565 
1566 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1567 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1568 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1569 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1570 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1571 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1572 
1573 #undef __FUNC__
1574 #define __FUNC__ "MatNorm_SeqBAIJ"
1575 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1576 {
1577   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1578   Scalar      *v = a->a;
1579   double      sum = 0.0;
1580   int         i,nz=a->nz,bs2=a->bs2;
1581 
1582   if (type == NORM_FROBENIUS) {
1583     for (i=0; i< bs2*nz; i++ ) {
1584 #if defined(PETSC_COMPLEX)
1585       sum += real(conj(*v)*(*v)); v++;
1586 #else
1587       sum += (*v)*(*v); v++;
1588 #endif
1589     }
1590     *norm = sqrt(sum);
1591   }
1592   else {
1593     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1594   }
1595   return 0;
1596 }
1597 
1598 /*
1599      note: This can only work for identity for row and col. It would
1600    be good to check this and otherwise generate an error.
1601 */
1602 #undef __FUNC__
1603 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1604 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1605 {
1606   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1607   Mat         outA;
1608   int         ierr;
1609 
1610   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1611 
1612   outA          = inA;
1613   inA->factor   = FACTOR_LU;
1614   a->row        = row;
1615   a->col        = col;
1616 
1617   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1618 
1619   if (!a->diag) {
1620     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1621   }
1622   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1623   return 0;
1624 }
1625 
1626 #undef __FUNC__
1627 #define __FUNC__ "MatScale_SeqBAIJ"
1628 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1629 {
1630   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1631   int         one = 1, totalnz = a->bs2*a->nz;
1632   BLscal_( &totalnz, alpha, a->a, &one );
1633   PLogFlops(totalnz);
1634   return 0;
1635 }
1636 
1637 #undef __FUNC__
1638 #define __FUNC__ "MatGetValues_SeqBAIJ"
1639 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1640 {
1641   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1642   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1643   int        *ai = a->i, *ailen = a->ilen;
1644   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1645   Scalar     *ap, *aa = a->a, zero = 0.0;
1646 
1647   for ( k=0; k<m; k++ ) { /* loop over rows */
1648     row  = im[k]; brow = row/bs;
1649     if (row < 0) SETERRQ(1,0,"Negative row");
1650     if (row >= a->m) SETERRQ(1,0,"Row too large");
1651     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1652     nrow = ailen[brow];
1653     for ( l=0; l<n; l++ ) { /* loop over columns */
1654       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1655       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1656       col  = in[l] ;
1657       bcol = col/bs;
1658       cidx = col%bs;
1659       ridx = row%bs;
1660       high = nrow;
1661       low  = 0; /* assume unsorted */
1662       while (high-low > 5) {
1663         t = (low+high)/2;
1664         if (rp[t] > bcol) high = t;
1665         else             low  = t;
1666       }
1667       for ( i=low; i<high; i++ ) {
1668         if (rp[i] > bcol) break;
1669         if (rp[i] == bcol) {
1670           *v++ = ap[bs2*i+bs*cidx+ridx];
1671           goto finished;
1672         }
1673       }
1674       *v++ = zero;
1675       finished:;
1676     }
1677   }
1678   return 0;
1679 }
1680 
1681 #undef __FUNC__
1682 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
1683 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1684 {
1685   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1686   *bs = baij->bs;
1687   return 0;
1688 }
1689 
1690 /* idx should be of length atlease bs */
1691 #undef __FUNC__
1692 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1693 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1694 {
1695   int i,row;
1696   row = idx[0];
1697   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1698 
1699   for ( i=1; i<bs; i++ ) {
1700     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1701   }
1702   *flg = PETSC_TRUE;
1703   return 0;
1704 }
1705 
1706 #undef __FUNC__
1707 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1708 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1709 {
1710   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1711   IS          is_local;
1712   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1713   PetscTruth  flg;
1714   Scalar      zero = 0.0,*aa;
1715 
1716   /* Make a copy of the IS and  sort it */
1717   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1718   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1719   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1720   ierr = ISSort(is_local); CHKERRQ(ierr);
1721   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1722 
1723   i = 0;
1724   while (i < is_n) {
1725     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1726     flg = PETSC_FALSE;
1727     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1728     if (flg) { /* There exists a block of rows to be Zerowed */
1729       baij->ilen[rows[i]/bs] = 0;
1730       i += bs;
1731     } else { /* Zero out only the requested row */
1732       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1733       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1734       for ( j=0; j<count; j++ ) {
1735         aa[0] = zero;
1736         aa+=bs;
1737       }
1738       i++;
1739     }
1740   }
1741   if (diag) {
1742     for ( j=0; j<is_n; j++ ) {
1743       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1744     }
1745   }
1746   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1747   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1748   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1749   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1750 
1751   return 0;
1752 }
1753 
1754 #undef __FUNC__
1755 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1756 int MatPrintHelp_SeqBAIJ(Mat A)
1757 {
1758   static int called = 0;
1759   MPI_Comm   comm = A->comm;
1760 
1761   if (called) return 0; else called = 1;
1762   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1763   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1764   return 0;
1765 }
1766 
1767 /* -------------------------------------------------------------------*/
1768 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1769        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1770        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1771        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1772        MatSolve_SeqBAIJ_N,0,
1773        0,0,
1774        MatLUFactor_SeqBAIJ,0,
1775        0,
1776        MatTranspose_SeqBAIJ,
1777        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1778        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1779        0,MatAssemblyEnd_SeqBAIJ,
1780        0,
1781        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1782        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1783        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1784        MatILUFactorSymbolic_SeqBAIJ,0,
1785        0,0,
1786        MatConvertSameType_SeqBAIJ,0,0,
1787        MatILUFactor_SeqBAIJ,0,0,
1788        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1789        MatGetValues_SeqBAIJ,0,
1790        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1791        0,0,0,MatGetBlockSize_SeqBAIJ,
1792        MatGetRowIJ_SeqBAIJ,
1793        MatRestoreRowIJ_SeqBAIJ};
1794 
1795 #undef __FUNC__
1796 #define __FUNC__ "MatCreateSeqBAIJ"
1797 /*@C
1798    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1799    compressed row) format.  For good matrix assembly performance the
1800    user should preallocate the matrix storage by setting the parameter nz
1801    (or the array nzz).  By setting these parameters accurately, performance
1802    during matrix assembly can be increased by more than a factor of 50.
1803 
1804    Input Parameters:
1805 .  comm - MPI communicator, set to MPI_COMM_SELF
1806 .  bs - size of block
1807 .  m - number of rows
1808 .  n - number of columns
1809 .  nz - number of block nonzeros per block row (same for all rows)
1810 .  nzz - array containing the number of block nonzeros in the various block rows
1811          (possibly different for each block row) or PETSC_NULL
1812 
1813    Output Parameter:
1814 .  A - the matrix
1815 
1816    Options Database Keys:
1817 $    -mat_no_unroll - uses code that does not unroll the loops in the
1818 $                     block calculations (much solver)
1819 $    -mat_block_size - size of the blocks to use
1820 
1821    Notes:
1822    The block AIJ format is fully compatible with standard Fortran 77
1823    storage.  That is, the stored row and column indices can begin at
1824    either one (as in Fortran) or zero.  See the users' manual for details.
1825 
1826    Specify the preallocated storage with either nz or nnz (not both).
1827    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1828    allocation.  For additional details, see the users manual chapter on
1829    matrices.
1830 
1831 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1832 @*/
1833 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1834 {
1835   Mat         B;
1836   Mat_SeqBAIJ *b;
1837   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1838 
1839   MPI_Comm_size(comm,&size);
1840   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
1841 
1842   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1843 
1844   if (mbs*bs!=m || nbs*bs!=n)
1845     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
1846 
1847   *A = 0;
1848   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1849   PLogObjectCreate(B);
1850   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1851   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1852   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1853   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1854   if (!flg) {
1855     switch (bs) {
1856       case 1:
1857         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1858         B->ops.solve           = MatSolve_SeqBAIJ_1;
1859         B->ops.mult            = MatMult_SeqBAIJ_1;
1860         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1861        break;
1862       case 2:
1863         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1864         B->ops.solve           = MatSolve_SeqBAIJ_2;
1865         B->ops.mult            = MatMult_SeqBAIJ_2;
1866         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1867         break;
1868       case 3:
1869         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1870         B->ops.solve           = MatSolve_SeqBAIJ_3;
1871         B->ops.mult            = MatMult_SeqBAIJ_3;
1872         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1873         break;
1874       case 4:
1875         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1876         B->ops.solve           = MatSolve_SeqBAIJ_4;
1877         B->ops.mult            = MatMult_SeqBAIJ_4;
1878         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1879         break;
1880       case 5:
1881         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1882         B->ops.solve           = MatSolve_SeqBAIJ_5;
1883         B->ops.mult            = MatMult_SeqBAIJ_5;
1884         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1885         break;
1886     }
1887   }
1888   B->destroy          = MatDestroy_SeqBAIJ;
1889   B->view             = MatView_SeqBAIJ;
1890   B->factor           = 0;
1891   B->lupivotthreshold = 1.0;
1892   B->mapping          = 0;
1893   b->row              = 0;
1894   b->col              = 0;
1895   b->reallocs         = 0;
1896 
1897   b->m       = m; B->m = m; B->M = m;
1898   b->n       = n; B->n = n; B->N = n;
1899   b->mbs     = mbs;
1900   b->nbs     = nbs;
1901   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1902   if (nnz == PETSC_NULL) {
1903     if (nz == PETSC_DEFAULT) nz = 5;
1904     else if (nz <= 0)        nz = 1;
1905     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1906     nz = nz*mbs;
1907   }
1908   else {
1909     nz = 0;
1910     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1911   }
1912 
1913   /* allocate the matrix space */
1914   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1915   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1916   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1917   b->j  = (int *) (b->a + nz*bs2);
1918   PetscMemzero(b->j,nz*sizeof(int));
1919   b->i  = b->j + nz;
1920   b->singlemalloc = 1;
1921 
1922   b->i[0] = 0;
1923   for (i=1; i<mbs+1; i++) {
1924     b->i[i] = b->i[i-1] + b->imax[i-1];
1925   }
1926 
1927   /* b->ilen will count nonzeros in each block row so far. */
1928   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1929   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1930   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1931 
1932   b->bs               = bs;
1933   b->bs2              = bs2;
1934   b->mbs              = mbs;
1935   b->nz               = 0;
1936   b->maxnz            = nz*bs2;
1937   b->sorted           = 0;
1938   b->roworiented      = 1;
1939   b->nonew            = 0;
1940   b->diag             = 0;
1941   b->solve_work       = 0;
1942   b->mult_work        = 0;
1943   b->spptr            = 0;
1944   B->info.nz_unneeded = (double)b->maxnz;
1945 
1946   *A = B;
1947   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1948   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1949   return 0;
1950 }
1951 
1952 #undef __FUNC__
1953 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
1954 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1955 {
1956   Mat         C;
1957   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1958   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1959 
1960   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
1961 
1962   *B = 0;
1963   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1964   PLogObjectCreate(C);
1965   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1966   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1967   C->destroy    = MatDestroy_SeqBAIJ;
1968   C->view       = MatView_SeqBAIJ;
1969   C->factor     = A->factor;
1970   c->row        = 0;
1971   c->col        = 0;
1972   C->assembled  = PETSC_TRUE;
1973 
1974   c->m = C->m   = a->m;
1975   c->n = C->n   = a->n;
1976   C->M          = a->m;
1977   C->N          = a->n;
1978 
1979   c->bs         = a->bs;
1980   c->bs2        = a->bs2;
1981   c->mbs        = a->mbs;
1982   c->nbs        = a->nbs;
1983 
1984   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1985   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1986   for ( i=0; i<mbs; i++ ) {
1987     c->imax[i] = a->imax[i];
1988     c->ilen[i] = a->ilen[i];
1989   }
1990 
1991   /* allocate the matrix space */
1992   c->singlemalloc = 1;
1993   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1994   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1995   c->j  = (int *) (c->a + nz*bs2);
1996   c->i  = c->j + nz;
1997   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1998   if (mbs > 0) {
1999     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2000     if (cpvalues == COPY_VALUES) {
2001       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2002     }
2003   }
2004 
2005   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2006   c->sorted      = a->sorted;
2007   c->roworiented = a->roworiented;
2008   c->nonew       = a->nonew;
2009 
2010   if (a->diag) {
2011     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2012     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2013     for ( i=0; i<mbs; i++ ) {
2014       c->diag[i] = a->diag[i];
2015     }
2016   }
2017   else c->diag          = 0;
2018   c->nz                 = a->nz;
2019   c->maxnz              = a->maxnz;
2020   c->solve_work         = 0;
2021   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2022   c->mult_work          = 0;
2023   *B = C;
2024   return 0;
2025 }
2026 
2027 #undef __FUNC__
2028 #define __FUNC__ "MatLoad_SeqBAIJ"
2029 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2030 {
2031   Mat_SeqBAIJ  *a;
2032   Mat          B;
2033   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2034   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2035   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2036   int          *masked, nmask,tmp,bs2,ishift;
2037   Scalar       *aa;
2038   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2039 
2040   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2041   bs2  = bs*bs;
2042 
2043   MPI_Comm_size(comm,&size);
2044   if (size > 1) SETERRQ(1,0,"view must have one processor");
2045   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2046   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2047   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2048   M = header[1]; N = header[2]; nz = header[3];
2049 
2050   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2051 
2052   /*
2053      This code adds extra rows to make sure the number of rows is
2054     divisible by the blocksize
2055   */
2056   mbs        = M/bs;
2057   extra_rows = bs - M + bs*(mbs);
2058   if (extra_rows == bs) extra_rows = 0;
2059   else                  mbs++;
2060   if (extra_rows) {
2061     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2062   }
2063 
2064   /* read in row lengths */
2065   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2066   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2067   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2068 
2069   /* read in column indices */
2070   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2071   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2072   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2073 
2074   /* loop over row lengths determining block row lengths */
2075   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2076   PetscMemzero(browlengths,mbs*sizeof(int));
2077   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2078   PetscMemzero(mask,mbs*sizeof(int));
2079   masked = mask + mbs;
2080   rowcount = 0; nzcount = 0;
2081   for ( i=0; i<mbs; i++ ) {
2082     nmask = 0;
2083     for ( j=0; j<bs; j++ ) {
2084       kmax = rowlengths[rowcount];
2085       for ( k=0; k<kmax; k++ ) {
2086         tmp = jj[nzcount++]/bs;
2087         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2088       }
2089       rowcount++;
2090     }
2091     browlengths[i] += nmask;
2092     /* zero out the mask elements we set */
2093     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2094   }
2095 
2096   /* create our matrix */
2097   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2098          CHKERRQ(ierr);
2099   B = *A;
2100   a = (Mat_SeqBAIJ *) B->data;
2101 
2102   /* set matrix "i" values */
2103   a->i[0] = 0;
2104   for ( i=1; i<= mbs; i++ ) {
2105     a->i[i]      = a->i[i-1] + browlengths[i-1];
2106     a->ilen[i-1] = browlengths[i-1];
2107   }
2108   a->nz         = 0;
2109   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2110 
2111   /* read in nonzero values */
2112   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2113   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2114   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2115 
2116   /* set "a" and "j" values into matrix */
2117   nzcount = 0; jcount = 0;
2118   for ( i=0; i<mbs; i++ ) {
2119     nzcountb = nzcount;
2120     nmask    = 0;
2121     for ( j=0; j<bs; j++ ) {
2122       kmax = rowlengths[i*bs+j];
2123       for ( k=0; k<kmax; k++ ) {
2124         tmp = jj[nzcount++]/bs;
2125 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2126       }
2127       rowcount++;
2128     }
2129     /* sort the masked values */
2130     PetscSortInt(nmask,masked);
2131 
2132     /* set "j" values into matrix */
2133     maskcount = 1;
2134     for ( j=0; j<nmask; j++ ) {
2135       a->j[jcount++]  = masked[j];
2136       mask[masked[j]] = maskcount++;
2137     }
2138     /* set "a" values into matrix */
2139     ishift = bs2*a->i[i];
2140     for ( j=0; j<bs; j++ ) {
2141       kmax = rowlengths[i*bs+j];
2142       for ( k=0; k<kmax; k++ ) {
2143         tmp    = jj[nzcountb]/bs ;
2144         block  = mask[tmp] - 1;
2145         point  = jj[nzcountb] - bs*tmp;
2146         idx    = ishift + bs2*block + j + bs*point;
2147         a->a[idx] = aa[nzcountb++];
2148       }
2149     }
2150     /* zero out the mask elements we set */
2151     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2152   }
2153   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2154 
2155   PetscFree(rowlengths);
2156   PetscFree(browlengths);
2157   PetscFree(aa);
2158   PetscFree(jj);
2159   PetscFree(mask);
2160 
2161   B->assembled = PETSC_TRUE;
2162 
2163   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2164   if (flg) {
2165     Viewer tviewer;
2166     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2167     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2168     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2169     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2170   }
2171   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2172   if (flg) {
2173     Viewer tviewer;
2174     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2175     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2176     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2177     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2178   }
2179   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2180   if (flg) {
2181     Viewer tviewer;
2182     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2183     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2184     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2185   }
2186   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2187   if (flg) {
2188     Viewer tviewer;
2189     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2190     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2191     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2192     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2193   }
2194   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2195   if (flg) {
2196     Viewer tviewer;
2197     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2198     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2199     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2200     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2201   }
2202   return 0;
2203 }
2204 
2205 
2206 
2207