xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0513a670233e070c0fc2d7ef3dc22f950deb2ce6)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.82 1997/01/06 20:25:20 balay Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 #undef __FUNC__
20 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21 int MatMarkDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         i,j, *diag, m = a->mbs;
25 
26   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27   PLogObjectMemory(A,(m+1)*sizeof(int));
28   for ( i=0; i<m; i++ ) {
29     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30       if (a->j[j] == i) {
31         diag[i] = j;
32         break;
33       }
34     }
35   }
36   a->diag = diag;
37   return 0;
38 }
39 
40 #include "draw.h"
41 #include "pinclude/pviewer.h"
42 #include "sys.h"
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary"
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw"
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ"
346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert;
418         }
419       }
420       if (nonew) goto noinsert;
421       if (nrow >= rmax) {
422         /* there is no extra room in row, therefore enlarge */
423         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424         Scalar *new_a;
425 
426         /* malloc new storage space */
427         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
429         new_j   = (int *) (new_a + bs2*new_nz);
430         new_i   = new_j + new_nz;
431 
432         /* copy over old data into new slots */
433         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
434         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438                                                            len*sizeof(int));
439         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443         /* free up old matrix storage */
444         PetscFree(a->a);
445         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447         a->singlemalloc = 1;
448 
449         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
451         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
452         a->maxnz += bs2*CHUNKSIZE;
453         a->reallocs++;
454         a->nz++;
455       }
456       N = nrow++ - 1;
457       /* shift up all the later entries in this row */
458       for ( ii=N; ii>=i; ii-- ) {
459         rp[ii+1] = rp[ii];
460         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461       }
462       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463       rp[i]                      = bcol;
464       ap[bs2*i + bs*cidx + ridx] = value;
465       noinsert:;
466       low = i;
467     }
468     ailen[brow] = nrow;
469   }
470   return 0;
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ "MatGetSize_SeqBAIJ"
475 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
476 {
477   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
478   *m = a->m; *n = a->n;
479   return 0;
480 }
481 
482 #undef __FUNC__
483 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
484 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
485 {
486   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
487   *m = 0; *n = a->m;
488   return 0;
489 }
490 
491 #undef __FUNC__
492 #define __FUNC__ "MatGetRow_SeqBAIJ"
493 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
494 {
495   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
496   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
497   Scalar      *aa,*v_i,*aa_i;
498 
499   bs  = a->bs;
500   ai  = a->i;
501   aj  = a->j;
502   aa  = a->a;
503   bs2 = a->bs2;
504 
505   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
506 
507   bn  = row/bs;   /* Block number */
508   bp  = row % bs; /* Block Position */
509   M   = ai[bn+1] - ai[bn];
510   *nz = bs*M;
511 
512   if (v) {
513     *v = 0;
514     if (*nz) {
515       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
516       for ( i=0; i<M; i++ ) { /* for each block in the block row */
517         v_i  = *v + i*bs;
518         aa_i = aa + bs2*(ai[bn] + i);
519         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
520       }
521     }
522   }
523 
524   if (idx) {
525     *idx = 0;
526     if (*nz) {
527       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
528       for ( i=0; i<M; i++ ) { /* for each block in the block row */
529         idx_i = *idx + i*bs;
530         itmp  = bs*aj[ai[bn] + i];
531         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
532       }
533     }
534   }
535   return 0;
536 }
537 
538 #undef __FUNC__
539 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
540 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
541 {
542   if (idx) {if (*idx) PetscFree(*idx);}
543   if (v)   {if (*v)   PetscFree(*v);}
544   return 0;
545 }
546 
547 #undef __FUNC__
548 #define __FUNC__ "MatTranspose_SeqBAIJ"
549 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
550 {
551   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
552   Mat         C;
553   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
554   int         *rows,*cols,bs2=a->bs2;
555   Scalar      *array=a->a;
556 
557   if (B==PETSC_NULL && mbs!=nbs)
558     SETERRQ(1,0,"Square matrix only for in-place");
559   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
560   PetscMemzero(col,(1+nbs)*sizeof(int));
561 
562   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
563   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
564   PetscFree(col);
565   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
566   cols = rows + bs;
567   for ( i=0; i<mbs; i++ ) {
568     cols[0] = i*bs;
569     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
570     len = ai[i+1] - ai[i];
571     for ( j=0; j<len; j++ ) {
572       rows[0] = (*aj++)*bs;
573       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
574       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
575       array += bs2;
576     }
577   }
578   PetscFree(rows);
579 
580   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
581   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
582 
583   if (B != PETSC_NULL) {
584     *B = C;
585   } else {
586     /* This isn't really an in-place transpose */
587     PetscFree(a->a);
588     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
589     if (a->diag) PetscFree(a->diag);
590     if (a->ilen) PetscFree(a->ilen);
591     if (a->imax) PetscFree(a->imax);
592     if (a->solve_work) PetscFree(a->solve_work);
593     PetscFree(a);
594     PetscMemcpy(A,C,sizeof(struct _Mat));
595     PetscHeaderDestroy(C);
596   }
597   return 0;
598 }
599 
600 
601 #undef __FUNC__
602 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
603 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
604 {
605   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
606   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
607   int        m = a->m,*ip, N, *ailen = a->ilen;
608   int        mbs = a->mbs, bs2 = a->bs2;
609   Scalar     *aa = a->a, *ap;
610 
611   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
612 
613   for ( i=1; i<mbs; i++ ) {
614     /* move each row back by the amount of empty slots (fshift) before it*/
615     fshift += imax[i-1] - ailen[i-1];
616     if (fshift) {
617       ip = aj + ai[i]; ap = aa + bs2*ai[i];
618       N = ailen[i];
619       for ( j=0; j<N; j++ ) {
620         ip[j-fshift] = ip[j];
621         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
622       }
623     }
624     ai[i] = ai[i-1] + ailen[i-1];
625   }
626   if (mbs) {
627     fshift += imax[mbs-1] - ailen[mbs-1];
628     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
629   }
630   /* reset ilen and imax for each row */
631   for ( i=0; i<mbs; i++ ) {
632     ailen[i] = imax[i] = ai[i+1] - ai[i];
633   }
634   a->nz = ai[mbs];
635 
636   /* diagonals may have moved, so kill the diagonal pointers */
637   if (fshift && a->diag) {
638     PetscFree(a->diag);
639     PLogObjectMemory(A,-(m+1)*sizeof(int));
640     a->diag = 0;
641   }
642   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
643            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
644   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
645            a->reallocs);
646   a->reallocs          = 0;
647   A->info.nz_unneeded  = (double)fshift*bs2;
648 
649   return 0;
650 }
651 
652 #undef __FUNC__
653 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
654 static int MatZeroEntries_SeqBAIJ(Mat A)
655 {
656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
657   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
658   return 0;
659 }
660 
661 #undef __FUNC__
662 #define __FUNC__ "MatDestroy_SeqBAIJ"
663 int MatDestroy_SeqBAIJ(PetscObject obj)
664 {
665   Mat         A  = (Mat) obj;
666   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
667   int         ierr;
668 
669 #if defined(PETSC_LOG)
670   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
671 #endif
672   PetscFree(a->a);
673   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
674   if (a->diag) PetscFree(a->diag);
675   if (a->ilen) PetscFree(a->ilen);
676   if (a->imax) PetscFree(a->imax);
677   if (a->solve_work) PetscFree(a->solve_work);
678   if (a->mult_work) PetscFree(a->mult_work);
679   PetscFree(a);
680   if (A->mapping) {
681     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
682   }
683   PLogObjectDestroy(A);
684   PetscHeaderDestroy(A);
685   return 0;
686 }
687 
688 #undef __FUNC__
689 #define __FUNC__ "MatSetOption_SeqBAIJ"
690 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
691 {
692   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
693   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
694   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
695   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
696   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
697   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
698   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
699   else if (op == MAT_ROWS_SORTED ||
700            op == MAT_ROWS_UNSORTED ||
701            op == MAT_SYMMETRIC ||
702            op == MAT_STRUCTURALLY_SYMMETRIC ||
703            op == MAT_YES_NEW_DIAGONALS ||
704            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
705     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
706   else if (op == MAT_NO_NEW_DIAGONALS)
707     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
708   else
709     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
710   return 0;
711 }
712 
713 
714 /* -------------------------------------------------------*/
715 /* Should check that shapes of vectors and matrices match */
716 /* -------------------------------------------------------*/
717 #include "pinclude/plapack.h"
718 
719 #undef __FUNC__
720 #define __FUNC__ "MatMult_SeqBAIJ_1"
721 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
722 {
723   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
724   register Scalar *x,*z,*v,sum;
725   int             mbs=a->mbs,i,*idx,*ii,n;
726 
727   VecGetArray_Fast(xx,x);
728   VecGetArray_Fast(zz,z);
729 
730   idx   = a->j;
731   v     = a->a;
732   ii    = a->i;
733 
734   for ( i=0; i<mbs; i++ ) {
735     n    = ii[1] - ii[0]; ii++;
736     sum  = 0.0;
737     while (n--) sum += *v++ * x[*idx++];
738     z[i] = sum;
739   }
740   VecRestoreArray_Fast(xx,x);
741   VecRestoreArray_Fast(zz,z);
742   PLogFlops(2*a->nz - a->m);
743   return 0;
744 }
745 
746 #undef __FUNC__
747 #define __FUNC__ "MatMult_SeqBAIJ_2"
748 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
749 {
750   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
751   register Scalar *x,*z,*v,*xb,sum1,sum2;
752   register Scalar x1,x2;
753   int             mbs=a->mbs,i,*idx,*ii;
754   int             j,n;
755 
756   VecGetArray_Fast(xx,x);
757   VecGetArray_Fast(zz,z);
758 
759   idx   = a->j;
760   v     = a->a;
761   ii    = a->i;
762 
763   for ( i=0; i<mbs; i++ ) {
764     n  = ii[1] - ii[0]; ii++;
765     sum1 = 0.0; sum2 = 0.0;
766     for ( j=0; j<n; j++ ) {
767       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
768       sum1 += v[0]*x1 + v[2]*x2;
769       sum2 += v[1]*x1 + v[3]*x2;
770       v += 4;
771     }
772     z[0] = sum1; z[1] = sum2;
773     z += 2;
774   }
775   VecRestoreArray_Fast(xx,x);
776   VecRestoreArray_Fast(zz,z);
777   PLogFlops(4*a->nz - a->m);
778   return 0;
779 }
780 
781 #undef __FUNC__
782 #define __FUNC__ "MatMult_SeqBAIJ_3"
783 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
784 {
785   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
786   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
787   int             mbs=a->mbs,i,*idx,*ii,j,n;
788 
789   VecGetArray_Fast(xx,x);
790   VecGetArray_Fast(zz,z);
791 
792   idx   = a->j;
793   v     = a->a;
794   ii    = a->i;
795 
796   for ( i=0; i<mbs; i++ ) {
797     n  = ii[1] - ii[0]; ii++;
798     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
799     for ( j=0; j<n; j++ ) {
800       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
801       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
802       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
803       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
804       v += 9;
805     }
806     z[0] = sum1; z[1] = sum2; z[2] = sum3;
807     z += 3;
808   }
809   VecRestoreArray_Fast(xx,x);
810   VecRestoreArray_Fast(zz,z);
811   PLogFlops(18*a->nz - a->m);
812   return 0;
813 }
814 
815 #undef __FUNC__
816 #define __FUNC__ "MatMult_SeqBAIJ_4"
817 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
818 {
819   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
820   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
821   register Scalar x1,x2,x3,x4;
822   int             mbs=a->mbs,i,*idx,*ii;
823   int             j,n;
824 
825   VecGetArray_Fast(xx,x);
826   VecGetArray_Fast(zz,z);
827 
828   idx   = a->j;
829   v     = a->a;
830   ii    = a->i;
831 
832   for ( i=0; i<mbs; i++ ) {
833     n  = ii[1] - ii[0]; ii++;
834     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
835     for ( j=0; j<n; j++ ) {
836       xb = x + 4*(*idx++);
837       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
838       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
839       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
840       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
841       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
842       v += 16;
843     }
844     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
845     z += 4;
846   }
847   VecRestoreArray_Fast(xx,x);
848   VecRestoreArray_Fast(zz,z);
849   PLogFlops(32*a->nz - a->m);
850   return 0;
851 }
852 
853 #undef __FUNC__
854 #define __FUNC__ "MatMult_SeqBAIJ_5"
855 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
856 {
857   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
858   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
859   register Scalar x1,x2,x3,x4,x5;
860   int             mbs=a->mbs,i,*idx,*ii,j,n;
861 
862   VecGetArray_Fast(xx,x);
863   VecGetArray_Fast(zz,z);
864 
865   idx   = a->j;
866   v     = a->a;
867   ii    = a->i;
868 
869   for ( i=0; i<mbs; i++ ) {
870     n  = ii[1] - ii[0]; ii++;
871     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
872     for ( j=0; j<n; j++ ) {
873       xb = x + 5*(*idx++);
874       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
875       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
876       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
877       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
878       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
879       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
880       v += 25;
881     }
882     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
883     z += 5;
884   }
885   VecRestoreArray_Fast(xx,x);
886   VecRestoreArray_Fast(zz,z);
887   PLogFlops(50*a->nz - a->m);
888   return 0;
889 }
890 
891 #undef __FUNC__
892 #define __FUNC__ "MatMult_SeqBAIJ_N"
893 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
894 {
895   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
896   register Scalar *x,*z,*v,*xb;
897   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
898   int             _One = 1,ncols,k;
899   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
900 
901   VecGetArray_Fast(xx,x);
902   VecGetArray_Fast(zz,z);
903 
904   idx   = a->j;
905   v     = a->a;
906   ii    = a->i;
907 
908 
909   if (!a->mult_work) {
910     k = PetscMax(a->m,a->n);
911     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
912   }
913   work = a->mult_work;
914   for ( i=0; i<mbs; i++ ) {
915     n     = ii[1] - ii[0]; ii++;
916     ncols = n*bs;
917     workt = work;
918     for ( j=0; j<n; j++ ) {
919       xb = x + bs*(*idx++);
920       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
921       workt += bs;
922     }
923     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
924     v += n*bs2;
925     z += bs;
926   }
927   VecRestoreArray_Fast(xx,x);
928   VecRestoreArray_Fast(zz,z);
929   PLogFlops(2*a->nz*bs2 - a->m);
930   return 0;
931 }
932 
933 #undef __FUNC__
934 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
935 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
938   register Scalar *x,*y,*z,*v,sum;
939   int             mbs=a->mbs,i,*idx,*ii,n;
940 
941   VecGetArray_Fast(xx,x);
942   VecGetArray_Fast(yy,y);
943   VecGetArray_Fast(zz,z);
944 
945   idx   = a->j;
946   v     = a->a;
947   ii    = a->i;
948 
949   for ( i=0; i<mbs; i++ ) {
950     n    = ii[1] - ii[0]; ii++;
951     sum  = y[i];
952     while (n--) sum += *v++ * x[*idx++];
953     z[i] = sum;
954   }
955   VecRestoreArray_Fast(xx,x);
956   VecRestoreArray_Fast(yy,y);
957   VecRestoreArray_Fast(zz,z);
958   PLogFlops(2*a->nz);
959   return 0;
960 }
961 
962 #undef __FUNC__
963 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
964 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
967   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
968   register Scalar x1,x2;
969   int             mbs=a->mbs,i,*idx,*ii;
970   int             j,n;
971 
972   VecGetArray_Fast(xx,x);
973   VecGetArray_Fast(yy,y);
974   VecGetArray_Fast(zz,z);
975 
976   idx   = a->j;
977   v     = a->a;
978   ii    = a->i;
979 
980   for ( i=0; i<mbs; i++ ) {
981     n  = ii[1] - ii[0]; ii++;
982     sum1 = y[0]; sum2 = y[1];
983     for ( j=0; j<n; j++ ) {
984       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
985       sum1 += v[0]*x1 + v[2]*x2;
986       sum2 += v[1]*x1 + v[3]*x2;
987       v += 4;
988     }
989     z[0] = sum1; z[1] = sum2;
990     z += 2; y += 2;
991   }
992   VecRestoreArray_Fast(xx,x);
993   VecRestoreArray_Fast(yy,y);
994   VecRestoreArray_Fast(zz,z);
995   PLogFlops(4*a->nz);
996   return 0;
997 }
998 
999 #undef __FUNC__
1000 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1001 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1002 {
1003   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1004   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1005   int             mbs=a->mbs,i,*idx,*ii,j,n;
1006 
1007   VecGetArray_Fast(xx,x);
1008   VecGetArray_Fast(yy,y);
1009   VecGetArray_Fast(zz,z);
1010 
1011   idx   = a->j;
1012   v     = a->a;
1013   ii    = a->i;
1014 
1015   for ( i=0; i<mbs; i++ ) {
1016     n  = ii[1] - ii[0]; ii++;
1017     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1018     for ( j=0; j<n; j++ ) {
1019       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1020       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1021       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1022       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1023       v += 9;
1024     }
1025     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1026     z += 3; y += 3;
1027   }
1028   VecRestoreArray_Fast(xx,x);
1029   VecRestoreArray_Fast(yy,y);
1030   VecRestoreArray_Fast(zz,z);
1031   PLogFlops(18*a->nz);
1032   return 0;
1033 }
1034 
1035 #undef __FUNC__
1036 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1037 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1038 {
1039   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1040   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1041   register Scalar x1,x2,x3,x4;
1042   int             mbs=a->mbs,i,*idx,*ii;
1043   int             j,n;
1044 
1045   VecGetArray_Fast(xx,x);
1046   VecGetArray_Fast(yy,y);
1047   VecGetArray_Fast(zz,z);
1048 
1049   idx   = a->j;
1050   v     = a->a;
1051   ii    = a->i;
1052 
1053   for ( i=0; i<mbs; i++ ) {
1054     n  = ii[1] - ii[0]; ii++;
1055     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1056     for ( j=0; j<n; j++ ) {
1057       xb = x + 4*(*idx++);
1058       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1059       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1060       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1061       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1062       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1063       v += 16;
1064     }
1065     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1066     z += 4; y += 4;
1067   }
1068   VecRestoreArray_Fast(xx,x);
1069   VecRestoreArray_Fast(yy,y);
1070   VecRestoreArray_Fast(zz,z);
1071   PLogFlops(32*a->nz);
1072   return 0;
1073 }
1074 
1075 #undef __FUNC__
1076 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1077 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1078 {
1079   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1080   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1081   register Scalar x1,x2,x3,x4,x5;
1082   int             mbs=a->mbs,i,*idx,*ii,j,n;
1083 
1084   VecGetArray_Fast(xx,x);
1085   VecGetArray_Fast(yy,y);
1086   VecGetArray_Fast(zz,z);
1087 
1088   idx   = a->j;
1089   v     = a->a;
1090   ii    = a->i;
1091 
1092   for ( i=0; i<mbs; i++ ) {
1093     n  = ii[1] - ii[0]; ii++;
1094     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1095     for ( j=0; j<n; j++ ) {
1096       xb = x + 5*(*idx++);
1097       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1098       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1099       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1100       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1101       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1102       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1103       v += 25;
1104     }
1105     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1106     z += 5; y += 5;
1107   }
1108   VecRestoreArray_Fast(xx,x);
1109   VecRestoreArray_Fast(yy,y);
1110   VecRestoreArray_Fast(zz,z);
1111   PLogFlops(50*a->nz);
1112   return 0;
1113 }
1114 
1115 #undef __FUNC__
1116 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1117 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1118 {
1119   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1120   register Scalar *x,*z,*v,*xb;
1121   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1122   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1123 
1124   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1125 
1126   VecGetArray_Fast(xx,x);
1127   VecGetArray_Fast(zz,z);
1128 
1129   idx   = a->j;
1130   v     = a->a;
1131   ii    = a->i;
1132 
1133 
1134   if (!a->mult_work) {
1135     k = PetscMax(a->m,a->n);
1136     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1137   }
1138   work = a->mult_work;
1139   for ( i=0; i<mbs; i++ ) {
1140     n     = ii[1] - ii[0]; ii++;
1141     ncols = n*bs;
1142     workt = work;
1143     for ( j=0; j<n; j++ ) {
1144       xb = x + bs*(*idx++);
1145       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1146       workt += bs;
1147     }
1148     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1149     v += n*bs2;
1150     z += bs;
1151   }
1152   VecRestoreArray_Fast(xx,x);
1153   VecRestoreArray_Fast(zz,z);
1154   PLogFlops(2*a->nz*bs2 );
1155   return 0;
1156 }
1157 
1158 #undef __FUNC__
1159 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1160 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1161 {
1162   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1163   Scalar          *xg,*zg,*zb;
1164   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1165   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1166   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1167 
1168 
1169   VecGetArray_Fast(xx,xg); x = xg;
1170   VecGetArray_Fast(zz,zg); z = zg;
1171   PetscMemzero(z,N*sizeof(Scalar));
1172 
1173   idx   = a->j;
1174   v     = a->a;
1175   ii    = a->i;
1176 
1177   switch (bs) {
1178   case 1:
1179     for ( i=0; i<mbs; i++ ) {
1180       n  = ii[1] - ii[0]; ii++;
1181       xb = x + i; x1 = xb[0];
1182       ib = idx + ai[i];
1183       for ( j=0; j<n; j++ ) {
1184         rval    = ib[j];
1185         z[rval] += *v++ * x1;
1186       }
1187     }
1188     break;
1189   case 2:
1190     for ( i=0; i<mbs; i++ ) {
1191       n  = ii[1] - ii[0]; ii++;
1192       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1193       ib = idx + ai[i];
1194       for ( j=0; j<n; j++ ) {
1195         rval      = ib[j]*2;
1196         z[rval++] += v[0]*x1 + v[1]*x2;
1197         z[rval++] += v[2]*x1 + v[3]*x2;
1198         v += 4;
1199       }
1200     }
1201     break;
1202   case 3:
1203     for ( i=0; i<mbs; i++ ) {
1204       n  = ii[1] - ii[0]; ii++;
1205       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1206       ib = idx + ai[i];
1207       for ( j=0; j<n; j++ ) {
1208         rval      = ib[j]*3;
1209         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1210         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1211         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1212         v += 9;
1213       }
1214     }
1215     break;
1216   case 4:
1217     for ( i=0; i<mbs; i++ ) {
1218       n  = ii[1] - ii[0]; ii++;
1219       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1220       ib = idx + ai[i];
1221       for ( j=0; j<n; j++ ) {
1222         rval      = ib[j]*4;
1223         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1224         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1225         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1226         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1227         v += 16;
1228       }
1229     }
1230     break;
1231   case 5:
1232     for ( i=0; i<mbs; i++ ) {
1233       n  = ii[1] - ii[0]; ii++;
1234       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1235       x4 = xb[3];   x5 = xb[4];
1236       ib = idx + ai[i];
1237       for ( j=0; j<n; j++ ) {
1238         rval      = ib[j]*5;
1239         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1240         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1241         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1242         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1243         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1244         v += 25;
1245       }
1246     }
1247     break;
1248       /* block sizes larger then 5 by 5 are handled by BLAS */
1249     default: {
1250       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1251       if (!a->mult_work) {
1252         k = PetscMax(a->m,a->n);
1253         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1254         CHKPTRQ(a->mult_work);
1255       }
1256       work = a->mult_work;
1257       for ( i=0; i<mbs; i++ ) {
1258         n     = ii[1] - ii[0]; ii++;
1259         ncols = n*bs;
1260         PetscMemzero(work,ncols*sizeof(Scalar));
1261         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1262         v += n*bs2;
1263         x += bs;
1264         workt = work;
1265         for ( j=0; j<n; j++ ) {
1266           zb = z + bs*(*idx++);
1267           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1268           workt += bs;
1269         }
1270       }
1271     }
1272   }
1273   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1274   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1275   PLogFlops(2*a->nz*a->bs2 - a->n);
1276   return 0;
1277 }
1278 
1279 #undef __FUNC__
1280 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1281 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1282 {
1283   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1284   Scalar          *xg,*zg,*zb;
1285   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1286   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1287   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1288 
1289 
1290 
1291   VecGetArray_Fast(xx,xg); x = xg;
1292   VecGetArray_Fast(zz,zg); z = zg;
1293 
1294   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1295   else PetscMemzero(z,N*sizeof(Scalar));
1296 
1297 
1298   idx   = a->j;
1299   v     = a->a;
1300   ii    = a->i;
1301 
1302   switch (bs) {
1303   case 1:
1304     for ( i=0; i<mbs; i++ ) {
1305       n  = ii[1] - ii[0]; ii++;
1306       xb = x + i; x1 = xb[0];
1307       ib = idx + ai[i];
1308       for ( j=0; j<n; j++ ) {
1309         rval    = ib[j];
1310         z[rval] += *v++ * x1;
1311       }
1312     }
1313     break;
1314   case 2:
1315     for ( i=0; i<mbs; i++ ) {
1316       n  = ii[1] - ii[0]; ii++;
1317       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1318       ib = idx + ai[i];
1319       for ( j=0; j<n; j++ ) {
1320         rval      = ib[j]*2;
1321         z[rval++] += v[0]*x1 + v[1]*x2;
1322         z[rval++] += v[2]*x1 + v[3]*x2;
1323         v += 4;
1324       }
1325     }
1326     break;
1327   case 3:
1328     for ( i=0; i<mbs; i++ ) {
1329       n  = ii[1] - ii[0]; ii++;
1330       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1331       ib = idx + ai[i];
1332       for ( j=0; j<n; j++ ) {
1333         rval      = ib[j]*3;
1334         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1335         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1336         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1337         v += 9;
1338       }
1339     }
1340     break;
1341   case 4:
1342     for ( i=0; i<mbs; i++ ) {
1343       n  = ii[1] - ii[0]; ii++;
1344       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1345       ib = idx + ai[i];
1346       for ( j=0; j<n; j++ ) {
1347         rval      = ib[j]*4;
1348         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1349         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1350         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1351         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1352         v += 16;
1353       }
1354     }
1355     break;
1356   case 5:
1357     for ( i=0; i<mbs; i++ ) {
1358       n  = ii[1] - ii[0]; ii++;
1359       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1360       x4 = xb[3];   x5 = xb[4];
1361       ib = idx + ai[i];
1362       for ( j=0; j<n; j++ ) {
1363         rval      = ib[j]*5;
1364         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1365         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1366         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1367         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1368         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1369         v += 25;
1370       }
1371     }
1372     break;
1373       /* block sizes larger then 5 by 5 are handled by BLAS */
1374     default: {
1375       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1376       if (!a->mult_work) {
1377         k = PetscMax(a->m,a->n);
1378         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1379         CHKPTRQ(a->mult_work);
1380       }
1381       work = a->mult_work;
1382       for ( i=0; i<mbs; i++ ) {
1383         n     = ii[1] - ii[0]; ii++;
1384         ncols = n*bs;
1385         PetscMemzero(work,ncols*sizeof(Scalar));
1386         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1387         v += n*bs2;
1388         x += bs;
1389         workt = work;
1390         for ( j=0; j<n; j++ ) {
1391           zb = z + bs*(*idx++);
1392           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1393           workt += bs;
1394         }
1395       }
1396     }
1397   }
1398   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1399   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1400   PLogFlops(2*a->nz*a->bs2);
1401   return 0;
1402 }
1403 
1404 #undef __FUNC__
1405 #define __FUNC__ "MatGetInfo_SeqBAIJ"
1406 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1407 {
1408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1409 
1410   info->rows_global    = (double)a->m;
1411   info->columns_global = (double)a->n;
1412   info->rows_local     = (double)a->m;
1413   info->columns_local  = (double)a->n;
1414   info->block_size     = a->bs2;
1415   info->nz_allocated   = a->maxnz;
1416   info->nz_used        = a->bs2*a->nz;
1417   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1418   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1419     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1420   info->assemblies   = A->num_ass;
1421   info->mallocs      = a->reallocs;
1422   info->memory       = A->mem;
1423   if (A->factor) {
1424     info->fill_ratio_given  = A->info.fill_ratio_given;
1425     info->fill_ratio_needed = A->info.fill_ratio_needed;
1426     info->factor_mallocs    = A->info.factor_mallocs;
1427   } else {
1428     info->fill_ratio_given  = 0;
1429     info->fill_ratio_needed = 0;
1430     info->factor_mallocs    = 0;
1431   }
1432   return 0;
1433 }
1434 
1435 #undef __FUNC__
1436 #define __FUNC__ "MatEqual_SeqBAIJ"
1437 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1438 {
1439   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1440 
1441   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1442 
1443   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1444   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1445       (a->nz != b->nz)) {
1446     *flg = PETSC_FALSE; return 0;
1447   }
1448 
1449   /* if the a->i are the same */
1450   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1451     *flg = PETSC_FALSE; return 0;
1452   }
1453 
1454   /* if a->j are the same */
1455   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1456     *flg = PETSC_FALSE; return 0;
1457   }
1458 
1459   /* if a->a are the same */
1460   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1461     *flg = PETSC_FALSE; return 0;
1462   }
1463   *flg = PETSC_TRUE;
1464   return 0;
1465 
1466 }
1467 
1468 #undef __FUNC__
1469 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1470 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1471 {
1472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1473   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1474   Scalar      *x, zero = 0.0,*aa,*aa_j;
1475 
1476   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1477   bs   = a->bs;
1478   aa   = a->a;
1479   ai   = a->i;
1480   aj   = a->j;
1481   ambs = a->mbs;
1482   bs2  = a->bs2;
1483 
1484   VecSet(&zero,v);
1485   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1486   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1487   for ( i=0; i<ambs; i++ ) {
1488     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1489       if (aj[j] == i) {
1490         row  = i*bs;
1491         aa_j = aa+j*bs2;
1492         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1493         break;
1494       }
1495     }
1496   }
1497   return 0;
1498 }
1499 
1500 #undef __FUNC__
1501 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1502 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1503 {
1504   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1505   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1506   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1507 
1508   ai  = a->i;
1509   aj  = a->j;
1510   aa  = a->a;
1511   m   = a->m;
1512   n   = a->n;
1513   bs  = a->bs;
1514   mbs = a->mbs;
1515   bs2 = a->bs2;
1516   if (ll) {
1517     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1518     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1519     for ( i=0; i<mbs; i++ ) { /* for each block row */
1520       M  = ai[i+1] - ai[i];
1521       li = l + i*bs;
1522       v  = aa + bs2*ai[i];
1523       for ( j=0; j<M; j++ ) { /* for each block */
1524         for ( k=0; k<bs2; k++ ) {
1525           (*v++) *= li[k%bs];
1526         }
1527       }
1528     }
1529   }
1530 
1531   if (rr) {
1532     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1533     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1534     for ( i=0; i<mbs; i++ ) { /* for each block row */
1535       M  = ai[i+1] - ai[i];
1536       v  = aa + bs2*ai[i];
1537       for ( j=0; j<M; j++ ) { /* for each block */
1538         ri = r + bs*aj[ai[i]+j];
1539         for ( k=0; k<bs; k++ ) {
1540           x = ri[k];
1541           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1542         }
1543       }
1544     }
1545   }
1546   return 0;
1547 }
1548 
1549 
1550 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1551 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1552 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1553 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1554 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1555 
1556 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1557 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1558 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1559 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1560 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1561 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1562 
1563 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1564 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1565 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1566 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1567 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1568 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1569 
1570 #undef __FUNC__
1571 #define __FUNC__ "MatNorm_SeqBAIJ"
1572 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1573 {
1574   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1575   Scalar      *v = a->a;
1576   double      sum = 0.0;
1577   int         i,nz=a->nz,bs2=a->bs2;
1578 
1579   if (type == NORM_FROBENIUS) {
1580     for (i=0; i< bs2*nz; i++ ) {
1581 #if defined(PETSC_COMPLEX)
1582       sum += real(conj(*v)*(*v)); v++;
1583 #else
1584       sum += (*v)*(*v); v++;
1585 #endif
1586     }
1587     *norm = sqrt(sum);
1588   }
1589   else {
1590     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1591   }
1592   return 0;
1593 }
1594 
1595 /*
1596      note: This can only work for identity for row and col. It would
1597    be good to check this and otherwise generate an error.
1598 */
1599 #undef __FUNC__
1600 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1601 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1602 {
1603   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1604   Mat         outA;
1605   int         ierr;
1606 
1607   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1608 
1609   outA          = inA;
1610   inA->factor   = FACTOR_LU;
1611   a->row        = row;
1612   a->col        = col;
1613 
1614   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1615 
1616   if (!a->diag) {
1617     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1618   }
1619   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1620   return 0;
1621 }
1622 
1623 #undef __FUNC__
1624 #define __FUNC__ "MatScale_SeqBAIJ"
1625 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1626 {
1627   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1628   int         one = 1, totalnz = a->bs2*a->nz;
1629   BLscal_( &totalnz, alpha, a->a, &one );
1630   PLogFlops(totalnz);
1631   return 0;
1632 }
1633 
1634 #undef __FUNC__
1635 #define __FUNC__ "MatGetValues_SeqBAIJ"
1636 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1637 {
1638   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1639   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1640   int        *ai = a->i, *ailen = a->ilen;
1641   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1642   Scalar     *ap, *aa = a->a, zero = 0.0;
1643 
1644   for ( k=0; k<m; k++ ) { /* loop over rows */
1645     row  = im[k]; brow = row/bs;
1646     if (row < 0) SETERRQ(1,0,"Negative row");
1647     if (row >= a->m) SETERRQ(1,0,"Row too large");
1648     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1649     nrow = ailen[brow];
1650     for ( l=0; l<n; l++ ) { /* loop over columns */
1651       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1652       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1653       col  = in[l] ;
1654       bcol = col/bs;
1655       cidx = col%bs;
1656       ridx = row%bs;
1657       high = nrow;
1658       low  = 0; /* assume unsorted */
1659       while (high-low > 5) {
1660         t = (low+high)/2;
1661         if (rp[t] > bcol) high = t;
1662         else             low  = t;
1663       }
1664       for ( i=low; i<high; i++ ) {
1665         if (rp[i] > bcol) break;
1666         if (rp[i] == bcol) {
1667           *v++ = ap[bs2*i+bs*cidx+ridx];
1668           goto finished;
1669         }
1670       }
1671       *v++ = zero;
1672       finished:;
1673     }
1674   }
1675   return 0;
1676 }
1677 
1678 #undef __FUNC__
1679 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
1680 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1681 {
1682   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1683   *bs = baij->bs;
1684   return 0;
1685 }
1686 
1687 /* idx should be of length atlease bs */
1688 #undef __FUNC__
1689 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1690 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1691 {
1692   int i,row;
1693   row = idx[0];
1694   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1695 
1696   for ( i=1; i<bs; i++ ) {
1697     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1698   }
1699   *flg = PETSC_TRUE;
1700   return 0;
1701 }
1702 
1703 #undef __FUNC__
1704 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1705 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1706 {
1707   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1708   IS          is_local;
1709   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1710   PetscTruth  flg;
1711   Scalar      zero = 0.0,*aa;
1712 
1713   /* Make a copy of the IS and  sort it */
1714   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1715   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1716   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1717   ierr = ISSort(is_local); CHKERRQ(ierr);
1718   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1719 
1720   i = 0;
1721   while (i < is_n) {
1722     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1723     flg = PETSC_FALSE;
1724     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1725     if (flg) { /* There exists a block of rows to be Zerowed */
1726       baij->ilen[rows[i]/bs] = 0;
1727       i += bs;
1728     } else { /* Zero out only the requested row */
1729       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1730       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1731       for ( j=0; j<count; j++ ) {
1732         aa[0] = zero;
1733         aa+=bs;
1734       }
1735       i++;
1736     }
1737   }
1738   if (diag) {
1739     for ( j=0; j<is_n; j++ ) {
1740       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1741     }
1742   }
1743   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1744   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1745   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1746   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1747 
1748   return 0;
1749 }
1750 
1751 #undef __FUNC__
1752 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1753 int MatPrintHelp_SeqBAIJ(Mat A)
1754 {
1755   static int called = 0;
1756   MPI_Comm   comm = A->comm;
1757 
1758   if (called) return 0; else called = 1;
1759   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1760   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1761   return 0;
1762 }
1763 
1764 /* -------------------------------------------------------------------*/
1765 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1766        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1767        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1768        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1769        MatSolve_SeqBAIJ_N,0,
1770        0,0,
1771        MatLUFactor_SeqBAIJ,0,
1772        0,
1773        MatTranspose_SeqBAIJ,
1774        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1775        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1776        0,MatAssemblyEnd_SeqBAIJ,
1777        0,
1778        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1779        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1780        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1781        MatILUFactorSymbolic_SeqBAIJ,0,
1782        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1783        MatConvertSameType_SeqBAIJ,0,0,
1784        MatILUFactor_SeqBAIJ,0,0,
1785        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1786        MatGetValues_SeqBAIJ,0,
1787        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1788        0,0,0,MatGetBlockSize_SeqBAIJ,
1789        MatGetRowIJ_SeqBAIJ,
1790        MatRestoreRowIJ_SeqBAIJ};
1791 
1792 #undef __FUNC__
1793 #define __FUNC__ "MatCreateSeqBAIJ"
1794 /*@C
1795    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1796    compressed row) format.  For good matrix assembly performance the
1797    user should preallocate the matrix storage by setting the parameter nz
1798    (or the array nzz).  By setting these parameters accurately, performance
1799    during matrix assembly can be increased by more than a factor of 50.
1800 
1801    Input Parameters:
1802 .  comm - MPI communicator, set to MPI_COMM_SELF
1803 .  bs - size of block
1804 .  m - number of rows
1805 .  n - number of columns
1806 .  nz - number of block nonzeros per block row (same for all rows)
1807 .  nzz - array containing the number of block nonzeros in the various block rows
1808          (possibly different for each block row) or PETSC_NULL
1809 
1810    Output Parameter:
1811 .  A - the matrix
1812 
1813    Options Database Keys:
1814 $    -mat_no_unroll - uses code that does not unroll the loops in the
1815 $                     block calculations (much solver)
1816 $    -mat_block_size - size of the blocks to use
1817 
1818    Notes:
1819    The block AIJ format is fully compatible with standard Fortran 77
1820    storage.  That is, the stored row and column indices can begin at
1821    either one (as in Fortran) or zero.  See the users' manual for details.
1822 
1823    Specify the preallocated storage with either nz or nnz (not both).
1824    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1825    allocation.  For additional details, see the users manual chapter on
1826    matrices.
1827 
1828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1829 @*/
1830 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1831 {
1832   Mat         B;
1833   Mat_SeqBAIJ *b;
1834   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1835 
1836   MPI_Comm_size(comm,&size);
1837   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
1838 
1839   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1840 
1841   if (mbs*bs!=m || nbs*bs!=n)
1842     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
1843 
1844   *A = 0;
1845   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1846   PLogObjectCreate(B);
1847   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1848   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1849   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1850   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1851   if (!flg) {
1852     switch (bs) {
1853       case 1:
1854         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1855         B->ops.solve           = MatSolve_SeqBAIJ_1;
1856         B->ops.mult            = MatMult_SeqBAIJ_1;
1857         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1858        break;
1859       case 2:
1860         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1861         B->ops.solve           = MatSolve_SeqBAIJ_2;
1862         B->ops.mult            = MatMult_SeqBAIJ_2;
1863         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1864         break;
1865       case 3:
1866         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1867         B->ops.solve           = MatSolve_SeqBAIJ_3;
1868         B->ops.mult            = MatMult_SeqBAIJ_3;
1869         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1870         break;
1871       case 4:
1872         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1873         B->ops.solve           = MatSolve_SeqBAIJ_4;
1874         B->ops.mult            = MatMult_SeqBAIJ_4;
1875         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1876         break;
1877       case 5:
1878         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1879         B->ops.solve           = MatSolve_SeqBAIJ_5;
1880         B->ops.mult            = MatMult_SeqBAIJ_5;
1881         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1882         break;
1883     }
1884   }
1885   B->destroy          = MatDestroy_SeqBAIJ;
1886   B->view             = MatView_SeqBAIJ;
1887   B->factor           = 0;
1888   B->lupivotthreshold = 1.0;
1889   B->mapping          = 0;
1890   b->row              = 0;
1891   b->col              = 0;
1892   b->reallocs         = 0;
1893 
1894   b->m       = m; B->m = m; B->M = m;
1895   b->n       = n; B->n = n; B->N = n;
1896   b->mbs     = mbs;
1897   b->nbs     = nbs;
1898   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1899   if (nnz == PETSC_NULL) {
1900     if (nz == PETSC_DEFAULT) nz = 5;
1901     else if (nz <= 0)        nz = 1;
1902     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1903     nz = nz*mbs;
1904   }
1905   else {
1906     nz = 0;
1907     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1908   }
1909 
1910   /* allocate the matrix space */
1911   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1912   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1913   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1914   b->j  = (int *) (b->a + nz*bs2);
1915   PetscMemzero(b->j,nz*sizeof(int));
1916   b->i  = b->j + nz;
1917   b->singlemalloc = 1;
1918 
1919   b->i[0] = 0;
1920   for (i=1; i<mbs+1; i++) {
1921     b->i[i] = b->i[i-1] + b->imax[i-1];
1922   }
1923 
1924   /* b->ilen will count nonzeros in each block row so far. */
1925   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1926   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1927   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1928 
1929   b->bs               = bs;
1930   b->bs2              = bs2;
1931   b->mbs              = mbs;
1932   b->nz               = 0;
1933   b->maxnz            = nz*bs2;
1934   b->sorted           = 0;
1935   b->roworiented      = 1;
1936   b->nonew            = 0;
1937   b->diag             = 0;
1938   b->solve_work       = 0;
1939   b->mult_work        = 0;
1940   b->spptr            = 0;
1941   B->info.nz_unneeded = (double)b->maxnz;
1942 
1943   *A = B;
1944   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1945   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1946   return 0;
1947 }
1948 
1949 #undef __FUNC__
1950 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
1951 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1952 {
1953   Mat         C;
1954   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1955   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1956 
1957   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
1958 
1959   *B = 0;
1960   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1961   PLogObjectCreate(C);
1962   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1963   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1964   C->destroy    = MatDestroy_SeqBAIJ;
1965   C->view       = MatView_SeqBAIJ;
1966   C->factor     = A->factor;
1967   c->row        = 0;
1968   c->col        = 0;
1969   C->assembled  = PETSC_TRUE;
1970 
1971   c->m = C->m   = a->m;
1972   c->n = C->n   = a->n;
1973   C->M          = a->m;
1974   C->N          = a->n;
1975 
1976   c->bs         = a->bs;
1977   c->bs2        = a->bs2;
1978   c->mbs        = a->mbs;
1979   c->nbs        = a->nbs;
1980 
1981   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1982   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1983   for ( i=0; i<mbs; i++ ) {
1984     c->imax[i] = a->imax[i];
1985     c->ilen[i] = a->ilen[i];
1986   }
1987 
1988   /* allocate the matrix space */
1989   c->singlemalloc = 1;
1990   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1991   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1992   c->j  = (int *) (c->a + nz*bs2);
1993   c->i  = c->j + nz;
1994   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1995   if (mbs > 0) {
1996     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1997     if (cpvalues == COPY_VALUES) {
1998       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1999     }
2000   }
2001 
2002   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2003   c->sorted      = a->sorted;
2004   c->roworiented = a->roworiented;
2005   c->nonew       = a->nonew;
2006 
2007   if (a->diag) {
2008     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2009     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2010     for ( i=0; i<mbs; i++ ) {
2011       c->diag[i] = a->diag[i];
2012     }
2013   }
2014   else c->diag          = 0;
2015   c->nz                 = a->nz;
2016   c->maxnz              = a->maxnz;
2017   c->solve_work         = 0;
2018   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2019   c->mult_work          = 0;
2020   *B = C;
2021   return 0;
2022 }
2023 
2024 #undef __FUNC__
2025 #define __FUNC__ "MatLoad_SeqBAIJ"
2026 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2027 {
2028   Mat_SeqBAIJ  *a;
2029   Mat          B;
2030   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2031   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2032   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2033   int          *masked, nmask,tmp,bs2,ishift;
2034   Scalar       *aa;
2035   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2036 
2037   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2038   bs2  = bs*bs;
2039 
2040   MPI_Comm_size(comm,&size);
2041   if (size > 1) SETERRQ(1,0,"view must have one processor");
2042   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2043   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2044   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2045   M = header[1]; N = header[2]; nz = header[3];
2046 
2047   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2048 
2049   /*
2050      This code adds extra rows to make sure the number of rows is
2051     divisible by the blocksize
2052   */
2053   mbs        = M/bs;
2054   extra_rows = bs - M + bs*(mbs);
2055   if (extra_rows == bs) extra_rows = 0;
2056   else                  mbs++;
2057   if (extra_rows) {
2058     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2059   }
2060 
2061   /* read in row lengths */
2062   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2063   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2064   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2065 
2066   /* read in column indices */
2067   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2068   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2069   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2070 
2071   /* loop over row lengths determining block row lengths */
2072   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2073   PetscMemzero(browlengths,mbs*sizeof(int));
2074   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2075   PetscMemzero(mask,mbs*sizeof(int));
2076   masked = mask + mbs;
2077   rowcount = 0; nzcount = 0;
2078   for ( i=0; i<mbs; i++ ) {
2079     nmask = 0;
2080     for ( j=0; j<bs; j++ ) {
2081       kmax = rowlengths[rowcount];
2082       for ( k=0; k<kmax; k++ ) {
2083         tmp = jj[nzcount++]/bs;
2084         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2085       }
2086       rowcount++;
2087     }
2088     browlengths[i] += nmask;
2089     /* zero out the mask elements we set */
2090     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2091   }
2092 
2093   /* create our matrix */
2094   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2095          CHKERRQ(ierr);
2096   B = *A;
2097   a = (Mat_SeqBAIJ *) B->data;
2098 
2099   /* set matrix "i" values */
2100   a->i[0] = 0;
2101   for ( i=1; i<= mbs; i++ ) {
2102     a->i[i]      = a->i[i-1] + browlengths[i-1];
2103     a->ilen[i-1] = browlengths[i-1];
2104   }
2105   a->nz         = 0;
2106   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2107 
2108   /* read in nonzero values */
2109   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2110   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2111   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2112 
2113   /* set "a" and "j" values into matrix */
2114   nzcount = 0; jcount = 0;
2115   for ( i=0; i<mbs; i++ ) {
2116     nzcountb = nzcount;
2117     nmask    = 0;
2118     for ( j=0; j<bs; j++ ) {
2119       kmax = rowlengths[i*bs+j];
2120       for ( k=0; k<kmax; k++ ) {
2121         tmp = jj[nzcount++]/bs;
2122 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2123       }
2124       rowcount++;
2125     }
2126     /* sort the masked values */
2127     PetscSortInt(nmask,masked);
2128 
2129     /* set "j" values into matrix */
2130     maskcount = 1;
2131     for ( j=0; j<nmask; j++ ) {
2132       a->j[jcount++]  = masked[j];
2133       mask[masked[j]] = maskcount++;
2134     }
2135     /* set "a" values into matrix */
2136     ishift = bs2*a->i[i];
2137     for ( j=0; j<bs; j++ ) {
2138       kmax = rowlengths[i*bs+j];
2139       for ( k=0; k<kmax; k++ ) {
2140         tmp    = jj[nzcountb]/bs ;
2141         block  = mask[tmp] - 1;
2142         point  = jj[nzcountb] - bs*tmp;
2143         idx    = ishift + bs2*block + j + bs*point;
2144         a->a[idx] = aa[nzcountb++];
2145       }
2146     }
2147     /* zero out the mask elements we set */
2148     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2149   }
2150   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2151 
2152   PetscFree(rowlengths);
2153   PetscFree(browlengths);
2154   PetscFree(aa);
2155   PetscFree(jj);
2156   PetscFree(mask);
2157 
2158   B->assembled = PETSC_TRUE;
2159 
2160   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2161   if (flg) {
2162     Viewer tviewer;
2163     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2164     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2165     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2166     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2167   }
2168   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2169   if (flg) {
2170     Viewer tviewer;
2171     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2172     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2173     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2174     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2175   }
2176   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2177   if (flg) {
2178     Viewer tviewer;
2179     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2180     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2181     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2182   }
2183   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2184   if (flg) {
2185     Viewer tviewer;
2186     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2187     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2188     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2189     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2190   }
2191   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2192   if (flg) {
2193     Viewer tviewer;
2194     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2195     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2196     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2197     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2198   }
2199   return 0;
2200 }
2201 
2202 
2203 
2204