xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 63c41f6a1560bbb6cf7ee09697a660f5641fb9ab)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.78 1996/12/02 21:31:10 bsmith Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 #undef __FUNCTION__
20 #define __FUNCTION__ "MatMarkDiag_SeqBAIJ"
21 int MatMarkDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         i,j, *diag, m = a->mbs;
25 
26   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27   PLogObjectMemory(A,(m+1)*sizeof(int));
28   for ( i=0; i<m; i++ ) {
29     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30       if (a->j[j] == i) {
31         diag[i] = j;
32         break;
33       }
34     }
35   }
36   a->diag = diag;
37   return 0;
38 }
39 
40 #include "draw.h"
41 #include "pinclude/pviewer.h"
42 #include "sys.h"
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNCTION__
47 #define __FUNCTION__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNCTION__
72 #define __FUNCTION__ "MatRestoreRowIJ_SeqBAIJ"
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNCTION__
93 #define __FUNCTION__ "MatView_SeqBAIJ_Binary"
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNCTION__
151 #define __FUNCTION__ "MatView_SeqBAIJ_ASCII"
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNCTION__
218 #define __FUNCTION__ "MatView_SeqBAIJ_Draw"
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNCTION__
345 #define __FUNCTION__ "MatView_SeqBAIJ"
346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNCTION__
371 #define __FUNCTION__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
385     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
395       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert;
418         }
419       }
420       if (nonew) goto noinsert;
421       if (nrow >= rmax) {
422         /* there is no extra room in row, therefore enlarge */
423         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424         Scalar *new_a;
425 
426         /* malloc new storage space */
427         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
429         new_j   = (int *) (new_a + bs2*new_nz);
430         new_i   = new_j + new_nz;
431 
432         /* copy over old data into new slots */
433         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
434         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438                                                            len*sizeof(int));
439         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443         /* free up old matrix storage */
444         PetscFree(a->a);
445         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447         a->singlemalloc = 1;
448 
449         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
451         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
452         a->maxnz += bs2*CHUNKSIZE;
453         a->reallocs++;
454         a->nz++;
455       }
456       N = nrow++ - 1;
457       /* shift up all the later entries in this row */
458       for ( ii=N; ii>=i; ii-- ) {
459         rp[ii+1] = rp[ii];
460         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461       }
462       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463       rp[i]                      = bcol;
464       ap[bs2*i + bs*cidx + ridx] = value;
465       noinsert:;
466       low = i;
467     }
468     ailen[brow] = nrow;
469   }
470   return 0;
471 }
472 
473 #undef __FUNCTION__
474 #define __FUNCTION__ "MatGetSize_SeqBAIJ"
475 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
476 {
477   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
478   *m = a->m; *n = a->n;
479   return 0;
480 }
481 
482 #undef __FUNCTION__
483 #define __FUNCTION__ "MatGetOwnershipRange_SeqBAIJ"
484 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
485 {
486   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
487   *m = 0; *n = a->m;
488   return 0;
489 }
490 
491 #undef __FUNCTION__
492 #define __FUNCTION__ "MatGetRow_SeqBAIJ"
493 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
494 {
495   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
496   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
497   Scalar      *aa,*v_i,*aa_i;
498 
499   bs  = a->bs;
500   ai  = a->i;
501   aj  = a->j;
502   aa  = a->a;
503   bs2 = a->bs2;
504 
505   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
506 
507   bn  = row/bs;   /* Block number */
508   bp  = row % bs; /* Block Position */
509   M   = ai[bn+1] - ai[bn];
510   *nz = bs*M;
511 
512   if (v) {
513     *v = 0;
514     if (*nz) {
515       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
516       for ( i=0; i<M; i++ ) { /* for each block in the block row */
517         v_i  = *v + i*bs;
518         aa_i = aa + bs2*(ai[bn] + i);
519         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
520       }
521     }
522   }
523 
524   if (idx) {
525     *idx = 0;
526     if (*nz) {
527       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
528       for ( i=0; i<M; i++ ) { /* for each block in the block row */
529         idx_i = *idx + i*bs;
530         itmp  = bs*aj[ai[bn] + i];
531         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
532       }
533     }
534   }
535   return 0;
536 }
537 
538 #undef __FUNCTION__
539 #define __FUNCTION__ "MatRestoreRow_SeqBAIJ"
540 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
541 {
542   if (idx) {if (*idx) PetscFree(*idx);}
543   if (v)   {if (*v)   PetscFree(*v);}
544   return 0;
545 }
546 
547 #undef __FUNCTION__
548 #define __FUNCTION__ "MatTranspose_SeqBAIJ"
549 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
550 {
551   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
552   Mat         C;
553   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
554   int         *rows,*cols,bs2=a->bs2;
555   Scalar      *array=a->a;
556 
557   if (B==PETSC_NULL && mbs!=nbs)
558     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
559   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
560   PetscMemzero(col,(1+nbs)*sizeof(int));
561 
562   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
563   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
564   PetscFree(col);
565   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
566   cols = rows + bs;
567   for ( i=0; i<mbs; i++ ) {
568     cols[0] = i*bs;
569     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
570     len = ai[i+1] - ai[i];
571     for ( j=0; j<len; j++ ) {
572       rows[0] = (*aj++)*bs;
573       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
574       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
575       array += bs2;
576     }
577   }
578   PetscFree(rows);
579 
580   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
581   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
582 
583   if (B != PETSC_NULL) {
584     *B = C;
585   } else {
586     /* This isn't really an in-place transpose */
587     PetscFree(a->a);
588     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
589     if (a->diag) PetscFree(a->diag);
590     if (a->ilen) PetscFree(a->ilen);
591     if (a->imax) PetscFree(a->imax);
592     if (a->solve_work) PetscFree(a->solve_work);
593     PetscFree(a);
594     PetscMemcpy(A,C,sizeof(struct _Mat));
595     PetscHeaderDestroy(C);
596   }
597   return 0;
598 }
599 
600 
601 #undef __FUNCTION__
602 #define __FUNCTION__ "MatAssemblyEnd_SeqBAIJ"
603 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
604 {
605   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
606   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
607   int        m = a->m,*ip, N, *ailen = a->ilen;
608   int        mbs = a->mbs, bs2 = a->bs2;
609   Scalar     *aa = a->a, *ap;
610 
611   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
612 
613   for ( i=1; i<mbs; i++ ) {
614     /* move each row back by the amount of empty slots (fshift) before it*/
615     fshift += imax[i-1] - ailen[i-1];
616     if (fshift) {
617       ip = aj + ai[i]; ap = aa + bs2*ai[i];
618       N = ailen[i];
619       for ( j=0; j<N; j++ ) {
620         ip[j-fshift] = ip[j];
621         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
622       }
623     }
624     ai[i] = ai[i-1] + ailen[i-1];
625   }
626   if (mbs) {
627     fshift += imax[mbs-1] - ailen[mbs-1];
628     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
629   }
630   /* reset ilen and imax for each row */
631   for ( i=0; i<mbs; i++ ) {
632     ailen[i] = imax[i] = ai[i+1] - ai[i];
633   }
634   a->nz = ai[mbs];
635 
636   /* diagonals may have moved, so kill the diagonal pointers */
637   if (fshift && a->diag) {
638     PetscFree(a->diag);
639     PLogObjectMemory(A,-(m+1)*sizeof(int));
640     a->diag = 0;
641   }
642   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
643            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
644   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
645            a->reallocs);
646   a->reallocs          = 0;
647   A->info.nz_unneeded  = (double)fshift*bs2;
648 
649   return 0;
650 }
651 
652 #undef __FUNCTION__
653 #define __FUNCTION__ "MatZeroEntries_SeqBAIJ"
654 static int MatZeroEntries_SeqBAIJ(Mat A)
655 {
656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
657   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
658   return 0;
659 }
660 
661 #undef __FUNCTION__
662 #define __FUNCTION__ "MatDestroy_SeqBAIJ"
663 int MatDestroy_SeqBAIJ(PetscObject obj)
664 {
665   Mat         A  = (Mat) obj;
666   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
667   int         ierr;
668 
669 #if defined(PETSC_LOG)
670   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
671 #endif
672   PetscFree(a->a);
673   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
674   if (a->diag) PetscFree(a->diag);
675   if (a->ilen) PetscFree(a->ilen);
676   if (a->imax) PetscFree(a->imax);
677   if (a->solve_work) PetscFree(a->solve_work);
678   if (a->mult_work) PetscFree(a->mult_work);
679   PetscFree(a);
680   if (A->mapping) {
681     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
682   }
683   PLogObjectDestroy(A);
684   PetscHeaderDestroy(A);
685   return 0;
686 }
687 
688 #undef __FUNCTION__
689 #define __FUNCTION__ "MatSetOption_SeqBAIJ"
690 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
691 {
692   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
693   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
694   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
695   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
696   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
697   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
698   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
699   else if (op == MAT_ROWS_SORTED ||
700            op == MAT_ROWS_UNSORTED ||
701            op == MAT_SYMMETRIC ||
702            op == MAT_STRUCTURALLY_SYMMETRIC ||
703            op == MAT_YES_NEW_DIAGONALS ||
704            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
705     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
706   else if (op == MAT_NO_NEW_DIAGONALS)
707     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
708   else
709     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
710   return 0;
711 }
712 
713 
714 /* -------------------------------------------------------*/
715 /* Should check that shapes of vectors and matrices match */
716 /* -------------------------------------------------------*/
717 #include "pinclude/plapack.h"
718 
719 #undef __FUNCTION__
720 #define __FUNCTION__ "MatMult_SeqBAIJ_1"
721 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
722 {
723   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
724   register Scalar *x,*z,*v,sum;
725   int             mbs=a->mbs,i,*idx,*ii,n;
726 
727   VecGetArray_Fast(xx,x);
728   VecGetArray_Fast(zz,z);
729 
730   idx   = a->j;
731   v     = a->a;
732   ii    = a->i;
733 
734   for ( i=0; i<mbs; i++ ) {
735     n    = ii[1] - ii[0]; ii++;
736     sum  = 0.0;
737     while (n--) sum += *v++ * x[*idx++];
738     z[i] = sum;
739   }
740   VecRestoreArray_Fast(xx,x);
741   VecRestoreArray_Fast(zz,z);
742   PLogFlops(2*a->nz - a->m);
743   return 0;
744 }
745 
746 #undef __FUNCTION__
747 #define __FUNCTION__ "MatMult_SeqBAIJ_2"
748 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
749 {
750   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
751   register Scalar *x,*z,*v,*xb,sum1,sum2;
752   register Scalar x1,x2;
753   int             mbs=a->mbs,i,*idx,*ii;
754   int             j,n;
755 
756   VecGetArray_Fast(xx,x);
757   VecGetArray_Fast(zz,z);
758 
759   idx   = a->j;
760   v     = a->a;
761   ii    = a->i;
762 
763   for ( i=0; i<mbs; i++ ) {
764     n  = ii[1] - ii[0]; ii++;
765     sum1 = 0.0; sum2 = 0.0;
766     for ( j=0; j<n; j++ ) {
767       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
768       sum1 += v[0]*x1 + v[2]*x2;
769       sum2 += v[1]*x1 + v[3]*x2;
770       v += 4;
771     }
772     z[0] = sum1; z[1] = sum2;
773     z += 2;
774   }
775   VecRestoreArray_Fast(xx,x);
776   VecRestoreArray_Fast(zz,z);
777   PLogFlops(4*a->nz - a->m);
778   return 0;
779 }
780 
781 #undef __FUNCTION__
782 #define __FUNCTION__ "MatMult_SeqBAIJ_3"
783 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
784 {
785   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
786   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
787   int             mbs=a->mbs,i,*idx,*ii,j,n;
788 
789   VecGetArray_Fast(xx,x);
790   VecGetArray_Fast(zz,z);
791 
792   idx   = a->j;
793   v     = a->a;
794   ii    = a->i;
795 
796   for ( i=0; i<mbs; i++ ) {
797     n  = ii[1] - ii[0]; ii++;
798     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
799     for ( j=0; j<n; j++ ) {
800       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
801       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
802       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
803       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
804       v += 9;
805     }
806     z[0] = sum1; z[1] = sum2; z[2] = sum3;
807     z += 3;
808   }
809   VecRestoreArray_Fast(xx,x);
810   VecRestoreArray_Fast(zz,z);
811   PLogFlops(18*a->nz - a->m);
812   return 0;
813 }
814 
815 #undef __FUNCTION__
816 #define __FUNCTION__ "MatMult_SeqBAIJ_4"
817 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
818 {
819   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
820   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
821   register Scalar x1,x2,x3,x4;
822   int             mbs=a->mbs,i,*idx,*ii;
823   int             j,n;
824 
825   VecGetArray_Fast(xx,x);
826   VecGetArray_Fast(zz,z);
827 
828   idx   = a->j;
829   v     = a->a;
830   ii    = a->i;
831 
832   for ( i=0; i<mbs; i++ ) {
833     n  = ii[1] - ii[0]; ii++;
834     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
835     for ( j=0; j<n; j++ ) {
836       xb = x + 4*(*idx++);
837       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
838       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
839       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
840       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
841       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
842       v += 16;
843     }
844     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
845     z += 4;
846   }
847   VecRestoreArray_Fast(xx,x);
848   VecRestoreArray_Fast(zz,z);
849   PLogFlops(32*a->nz - a->m);
850   return 0;
851 }
852 
853 #undef __FUNCTION__
854 #define __FUNCTION__ "MatMult_SeqBAIJ_5"
855 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
856 {
857   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
858   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
859   register Scalar x1,x2,x3,x4,x5;
860   int             mbs=a->mbs,i,*idx,*ii,j,n;
861 
862   VecGetArray_Fast(xx,x);
863   VecGetArray_Fast(zz,z);
864 
865   idx   = a->j;
866   v     = a->a;
867   ii    = a->i;
868 
869   for ( i=0; i<mbs; i++ ) {
870     n  = ii[1] - ii[0]; ii++;
871     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
872     for ( j=0; j<n; j++ ) {
873       xb = x + 5*(*idx++);
874       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
875       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
876       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
877       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
878       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
879       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
880       v += 25;
881     }
882     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
883     z += 5;
884   }
885   VecRestoreArray_Fast(xx,x);
886   VecRestoreArray_Fast(zz,z);
887   PLogFlops(50*a->nz - a->m);
888   return 0;
889 }
890 
891 #undef __FUNCTION__
892 #define __FUNCTION__ "MatMult_SeqBAIJ_N"
893 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
894 {
895   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
896   register Scalar *x,*z,*v,*xb;
897   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
898   int             _One = 1,ncols,k;
899   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
900 
901   VecGetArray_Fast(xx,x);
902   VecGetArray_Fast(zz,z);
903 
904   idx   = a->j;
905   v     = a->a;
906   ii    = a->i;
907 
908 
909   if (!a->mult_work) {
910     k = PetscMax(a->m,a->n);
911     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
912   }
913   work = a->mult_work;
914   for ( i=0; i<mbs; i++ ) {
915     n     = ii[1] - ii[0]; ii++;
916     ncols = n*bs;
917     workt = work;
918     for ( j=0; j<n; j++ ) {
919       xb = x + bs*(*idx++);
920       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
921       workt += bs;
922     }
923     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
924     v += n*bs2;
925     z += bs;
926   }
927   VecRestoreArray_Fast(xx,x);
928   VecRestoreArray_Fast(zz,z);
929   PLogFlops(2*a->nz*bs2 - a->m);
930   return 0;
931 }
932 
933 #undef __FUNCTION__
934 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_1"
935 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
938   register Scalar *x,*y,*z,*v,sum;
939   int             mbs=a->mbs,i,*idx,*ii,n;
940 
941   VecGetArray_Fast(xx,x);
942   VecGetArray_Fast(yy,y);
943   VecGetArray_Fast(zz,z);
944 
945   idx   = a->j;
946   v     = a->a;
947   ii    = a->i;
948 
949   for ( i=0; i<mbs; i++ ) {
950     n    = ii[1] - ii[0]; ii++;
951     sum  = y[i];
952     while (n--) sum += *v++ * x[*idx++];
953     z[i] = sum;
954   }
955   VecRestoreArray_Fast(xx,x);
956   VecRestoreArray_Fast(yy,y);
957   VecRestoreArray_Fast(zz,z);
958   PLogFlops(2*a->nz);
959   return 0;
960 }
961 
962 #undef __FUNCTION__
963 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_2"
964 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
967   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
968   register Scalar x1,x2;
969   int             mbs=a->mbs,i,*idx,*ii;
970   int             j,n;
971 
972   VecGetArray_Fast(xx,x);
973   VecGetArray_Fast(yy,y);
974   VecGetArray_Fast(zz,z);
975 
976   idx   = a->j;
977   v     = a->a;
978   ii    = a->i;
979 
980   for ( i=0; i<mbs; i++ ) {
981     n  = ii[1] - ii[0]; ii++;
982     sum1 = y[0]; sum2 = y[1];
983     for ( j=0; j<n; j++ ) {
984       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
985       sum1 += v[0]*x1 + v[2]*x2;
986       sum2 += v[1]*x1 + v[3]*x2;
987       v += 4;
988     }
989     z[0] = sum1; z[1] = sum2;
990     z += 2; y += 2;
991   }
992   VecRestoreArray_Fast(xx,x);
993   VecRestoreArray_Fast(yy,y);
994   VecRestoreArray_Fast(zz,z);
995   PLogFlops(4*a->nz);
996   return 0;
997 }
998 
999 #undef __FUNCTION__
1000 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_3"
1001 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1002 {
1003   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1004   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1005   int             mbs=a->mbs,i,*idx,*ii,j,n;
1006 
1007   VecGetArray_Fast(xx,x);
1008   VecGetArray_Fast(yy,y);
1009   VecGetArray_Fast(zz,z);
1010 
1011   idx   = a->j;
1012   v     = a->a;
1013   ii    = a->i;
1014 
1015   for ( i=0; i<mbs; i++ ) {
1016     n  = ii[1] - ii[0]; ii++;
1017     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1018     for ( j=0; j<n; j++ ) {
1019       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1020       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1021       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1022       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1023       v += 9;
1024     }
1025     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1026     z += 3; y += 3;
1027   }
1028   VecRestoreArray_Fast(xx,x);
1029   VecRestoreArray_Fast(yy,y);
1030   VecRestoreArray_Fast(zz,z);
1031   PLogFlops(18*a->nz);
1032   return 0;
1033 }
1034 
1035 #undef __FUNCTION__
1036 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_4"
1037 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1038 {
1039   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1040   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1041   register Scalar x1,x2,x3,x4;
1042   int             mbs=a->mbs,i,*idx,*ii;
1043   int             j,n;
1044 
1045   VecGetArray_Fast(xx,x);
1046   VecGetArray_Fast(yy,y);
1047   VecGetArray_Fast(zz,z);
1048 
1049   idx   = a->j;
1050   v     = a->a;
1051   ii    = a->i;
1052 
1053   for ( i=0; i<mbs; i++ ) {
1054     n  = ii[1] - ii[0]; ii++;
1055     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1056     for ( j=0; j<n; j++ ) {
1057       xb = x + 4*(*idx++);
1058       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1059       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1060       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1061       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1062       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1063       v += 16;
1064     }
1065     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1066     z += 4; y += 4;
1067   }
1068   VecRestoreArray_Fast(xx,x);
1069   VecRestoreArray_Fast(yy,y);
1070   VecRestoreArray_Fast(zz,z);
1071   PLogFlops(32*a->nz);
1072   return 0;
1073 }
1074 
1075 #undef __FUNCTION__
1076 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_5"
1077 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1078 {
1079   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1080   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1081   register Scalar x1,x2,x3,x4,x5;
1082   int             mbs=a->mbs,i,*idx,*ii,j,n;
1083 
1084   VecGetArray_Fast(xx,x);
1085   VecGetArray_Fast(yy,y);
1086   VecGetArray_Fast(zz,z);
1087 
1088   idx   = a->j;
1089   v     = a->a;
1090   ii    = a->i;
1091 
1092   for ( i=0; i<mbs; i++ ) {
1093     n  = ii[1] - ii[0]; ii++;
1094     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1095     for ( j=0; j<n; j++ ) {
1096       xb = x + 5*(*idx++);
1097       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1098       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1099       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1100       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1101       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1102       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1103       v += 25;
1104     }
1105     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1106     z += 5; y += 5;
1107   }
1108   VecRestoreArray_Fast(xx,x);
1109   VecRestoreArray_Fast(yy,y);
1110   VecRestoreArray_Fast(zz,z);
1111   PLogFlops(50*a->nz);
1112   return 0;
1113 }
1114 
1115 #undef __FUNCTION__
1116 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_N"
1117 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1118 {
1119   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1120   register Scalar *x,*z,*v,*xb;
1121   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1122   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1123 
1124   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1125 
1126   VecGetArray_Fast(xx,x);
1127   VecGetArray_Fast(zz,z);
1128 
1129   idx   = a->j;
1130   v     = a->a;
1131   ii    = a->i;
1132 
1133 
1134   if (!a->mult_work) {
1135     k = PetscMax(a->m,a->n);
1136     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1137   }
1138   work = a->mult_work;
1139   for ( i=0; i<mbs; i++ ) {
1140     n     = ii[1] - ii[0]; ii++;
1141     ncols = n*bs;
1142     workt = work;
1143     for ( j=0; j<n; j++ ) {
1144       xb = x + bs*(*idx++);
1145       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1146       workt += bs;
1147     }
1148     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1149     v += n*bs2;
1150     z += bs;
1151   }
1152   VecRestoreArray_Fast(xx,x);
1153   VecRestoreArray_Fast(zz,z);
1154   PLogFlops(2*a->nz*bs2 );
1155   return 0;
1156 }
1157 
1158 #undef __FUNCTION__
1159 #define __FUNCTION__ "MatMultTrans_SeqBAIJ"
1160 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1161 {
1162   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1163   Scalar          *xg,*zg,*zb;
1164   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1165   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1166   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1167 
1168 
1169   VecGetArray_Fast(xx,xg); x = xg;
1170   VecGetArray_Fast(zz,zg); z = zg;
1171   PetscMemzero(z,N*sizeof(Scalar));
1172 
1173   idx   = a->j;
1174   v     = a->a;
1175   ii    = a->i;
1176 
1177   switch (bs) {
1178   case 1:
1179     for ( i=0; i<mbs; i++ ) {
1180       n  = ii[1] - ii[0]; ii++;
1181       xb = x + i; x1 = xb[0];
1182       ib = idx + ai[i];
1183       for ( j=0; j<n; j++ ) {
1184         rval    = ib[j];
1185         z[rval] += *v++ * x1;
1186       }
1187     }
1188     break;
1189   case 2:
1190     for ( i=0; i<mbs; i++ ) {
1191       n  = ii[1] - ii[0]; ii++;
1192       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1193       ib = idx + ai[i];
1194       for ( j=0; j<n; j++ ) {
1195         rval      = ib[j]*2;
1196         z[rval++] += v[0]*x1 + v[1]*x2;
1197         z[rval++] += v[2]*x1 + v[3]*x2;
1198         v += 4;
1199       }
1200     }
1201     break;
1202   case 3:
1203     for ( i=0; i<mbs; i++ ) {
1204       n  = ii[1] - ii[0]; ii++;
1205       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1206       ib = idx + ai[i];
1207       for ( j=0; j<n; j++ ) {
1208         rval      = ib[j]*3;
1209         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1210         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1211         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1212         v += 9;
1213       }
1214     }
1215     break;
1216   case 4:
1217     for ( i=0; i<mbs; i++ ) {
1218       n  = ii[1] - ii[0]; ii++;
1219       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1220       ib = idx + ai[i];
1221       for ( j=0; j<n; j++ ) {
1222         rval      = ib[j]*4;
1223         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1224         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1225         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1226         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1227         v += 16;
1228       }
1229     }
1230     break;
1231   case 5:
1232     for ( i=0; i<mbs; i++ ) {
1233       n  = ii[1] - ii[0]; ii++;
1234       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1235       x4 = xb[3];   x5 = xb[4];
1236       ib = idx + ai[i];
1237       for ( j=0; j<n; j++ ) {
1238         rval      = ib[j]*5;
1239         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1240         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1241         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1242         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1243         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1244         v += 25;
1245       }
1246     }
1247     break;
1248       /* block sizes larger then 5 by 5 are handled by BLAS */
1249     default: {
1250       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1251       if (!a->mult_work) {
1252         k = PetscMax(a->m,a->n);
1253         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1254         CHKPTRQ(a->mult_work);
1255       }
1256       work = a->mult_work;
1257       for ( i=0; i<mbs; i++ ) {
1258         n     = ii[1] - ii[0]; ii++;
1259         ncols = n*bs;
1260         PetscMemzero(work,ncols*sizeof(Scalar));
1261         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1262         v += n*bs2;
1263         x += bs;
1264         workt = work;
1265         for ( j=0; j<n; j++ ) {
1266           zb = z + bs*(*idx++);
1267           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1268           workt += bs;
1269         }
1270       }
1271     }
1272   }
1273   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1274   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1275   PLogFlops(2*a->nz*a->bs2 - a->n);
1276   return 0;
1277 }
1278 
1279 #undef __FUNCTION__
1280 #define __FUNCTION__ "MatMultTransAdd_SeqBAIJ"
1281 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1282 {
1283   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1284   Scalar          *xg,*zg,*zb;
1285   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1286   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1287   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1288 
1289 
1290 
1291   VecGetArray_Fast(xx,xg); x = xg;
1292   VecGetArray_Fast(zz,zg); z = zg;
1293 
1294   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1295   else PetscMemzero(z,N*sizeof(Scalar));
1296 
1297 
1298   idx   = a->j;
1299   v     = a->a;
1300   ii    = a->i;
1301 
1302   switch (bs) {
1303   case 1:
1304     for ( i=0; i<mbs; i++ ) {
1305       n  = ii[1] - ii[0]; ii++;
1306       xb = x + i; x1 = xb[0];
1307       ib = idx + ai[i];
1308       for ( j=0; j<n; j++ ) {
1309         rval    = ib[j];
1310         z[rval] += *v++ * x1;
1311       }
1312     }
1313     break;
1314   case 2:
1315     for ( i=0; i<mbs; i++ ) {
1316       n  = ii[1] - ii[0]; ii++;
1317       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1318       ib = idx + ai[i];
1319       for ( j=0; j<n; j++ ) {
1320         rval      = ib[j]*2;
1321         z[rval++] += v[0]*x1 + v[1]*x2;
1322         z[rval++] += v[2]*x1 + v[3]*x2;
1323         v += 4;
1324       }
1325     }
1326     break;
1327   case 3:
1328     for ( i=0; i<mbs; i++ ) {
1329       n  = ii[1] - ii[0]; ii++;
1330       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1331       ib = idx + ai[i];
1332       for ( j=0; j<n; j++ ) {
1333         rval      = ib[j]*3;
1334         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1335         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1336         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1337         v += 9;
1338       }
1339     }
1340     break;
1341   case 4:
1342     for ( i=0; i<mbs; i++ ) {
1343       n  = ii[1] - ii[0]; ii++;
1344       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1345       ib = idx + ai[i];
1346       for ( j=0; j<n; j++ ) {
1347         rval      = ib[j]*4;
1348         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1349         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1350         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1351         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1352         v += 16;
1353       }
1354     }
1355     break;
1356   case 5:
1357     for ( i=0; i<mbs; i++ ) {
1358       n  = ii[1] - ii[0]; ii++;
1359       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1360       x4 = xb[3];   x5 = xb[4];
1361       ib = idx + ai[i];
1362       for ( j=0; j<n; j++ ) {
1363         rval      = ib[j]*5;
1364         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1365         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1366         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1367         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1368         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1369         v += 25;
1370       }
1371     }
1372     break;
1373       /* block sizes larger then 5 by 5 are handled by BLAS */
1374     default: {
1375       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1376       if (!a->mult_work) {
1377         k = PetscMax(a->m,a->n);
1378         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1379         CHKPTRQ(a->mult_work);
1380       }
1381       work = a->mult_work;
1382       for ( i=0; i<mbs; i++ ) {
1383         n     = ii[1] - ii[0]; ii++;
1384         ncols = n*bs;
1385         PetscMemzero(work,ncols*sizeof(Scalar));
1386         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1387         v += n*bs2;
1388         x += bs;
1389         workt = work;
1390         for ( j=0; j<n; j++ ) {
1391           zb = z + bs*(*idx++);
1392           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1393           workt += bs;
1394         }
1395       }
1396     }
1397   }
1398   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1399   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1400   PLogFlops(2*a->nz*a->bs2);
1401   return 0;
1402 }
1403 
1404 #undef __FUNCTION__
1405 #define __FUNCTION__ "MatGetInfo_SeqBAIJ"
1406 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1407 {
1408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1409 
1410   info->rows_global    = (double)a->m;
1411   info->columns_global = (double)a->n;
1412   info->rows_local     = (double)a->m;
1413   info->columns_local  = (double)a->n;
1414   info->block_size     = a->bs2;
1415   info->nz_allocated   = a->maxnz;
1416   info->nz_used        = a->bs2*a->nz;
1417   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1418   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1419     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1420   info->assemblies   = A->num_ass;
1421   info->mallocs      = a->reallocs;
1422   info->memory       = A->mem;
1423   if (A->factor) {
1424     info->fill_ratio_given  = A->info.fill_ratio_given;
1425     info->fill_ratio_needed = A->info.fill_ratio_needed;
1426     info->factor_mallocs    = A->info.factor_mallocs;
1427   } else {
1428     info->fill_ratio_given  = 0;
1429     info->fill_ratio_needed = 0;
1430     info->factor_mallocs    = 0;
1431   }
1432   return 0;
1433 }
1434 
1435 #undef __FUNCTION__
1436 #define __FUNCTION__ "MatEqual_SeqBAIJ"
1437 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1438 {
1439   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1440 
1441   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1442 
1443   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1444   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1445       (a->nz != b->nz)) {
1446     *flg = PETSC_FALSE; return 0;
1447   }
1448 
1449   /* if the a->i are the same */
1450   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1451     *flg = PETSC_FALSE; return 0;
1452   }
1453 
1454   /* if a->j are the same */
1455   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1456     *flg = PETSC_FALSE; return 0;
1457   }
1458 
1459   /* if a->a are the same */
1460   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1461     *flg = PETSC_FALSE; return 0;
1462   }
1463   *flg = PETSC_TRUE;
1464   return 0;
1465 
1466 }
1467 
1468 #undef __FUNCTION__
1469 #define __FUNCTION__ "MatGetDiagonal_SeqBAIJ"
1470 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1471 {
1472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1473   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1474   Scalar      *x, zero = 0.0,*aa,*aa_j;
1475 
1476   bs   = a->bs;
1477   aa   = a->a;
1478   ai   = a->i;
1479   aj   = a->j;
1480   ambs = a->mbs;
1481   bs2  = a->bs2;
1482 
1483   VecSet(&zero,v);
1484   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1485   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1486   for ( i=0; i<ambs; i++ ) {
1487     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1488       if (aj[j] == i) {
1489         row  = i*bs;
1490         aa_j = aa+j*bs2;
1491         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1492         break;
1493       }
1494     }
1495   }
1496   return 0;
1497 }
1498 
1499 #undef __FUNCTION__
1500 #define __FUNCTION__ "MatDiagonalScale_SeqBAIJ"
1501 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1502 {
1503   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1504   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1505   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1506 
1507   ai  = a->i;
1508   aj  = a->j;
1509   aa  = a->a;
1510   m   = a->m;
1511   n   = a->n;
1512   bs  = a->bs;
1513   mbs = a->mbs;
1514   bs2 = a->bs2;
1515   if (ll) {
1516     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1517     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1518     for ( i=0; i<mbs; i++ ) { /* for each block row */
1519       M  = ai[i+1] - ai[i];
1520       li = l + i*bs;
1521       v  = aa + bs2*ai[i];
1522       for ( j=0; j<M; j++ ) { /* for each block */
1523         for ( k=0; k<bs2; k++ ) {
1524           (*v++) *= li[k%bs];
1525         }
1526       }
1527     }
1528   }
1529 
1530   if (rr) {
1531     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1532     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1533     for ( i=0; i<mbs; i++ ) { /* for each block row */
1534       M  = ai[i+1] - ai[i];
1535       v  = aa + bs2*ai[i];
1536       for ( j=0; j<M; j++ ) { /* for each block */
1537         ri = r + bs*aj[ai[i]+j];
1538         for ( k=0; k<bs; k++ ) {
1539           x = ri[k];
1540           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1541         }
1542       }
1543     }
1544   }
1545   return 0;
1546 }
1547 
1548 
1549 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1550 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1551 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1552 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1553 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1554 
1555 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1556 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1557 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1558 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1559 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1560 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1561 
1562 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1563 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1564 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1565 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1566 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1567 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1568 
1569 #undef __FUNCTION__
1570 #define __FUNCTION__ "MatNorm_SeqBAIJ"
1571 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1572 {
1573   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1574   Scalar      *v = a->a;
1575   double      sum = 0.0;
1576   int         i,nz=a->nz,bs2=a->bs2;
1577 
1578   if (type == NORM_FROBENIUS) {
1579     for (i=0; i< bs2*nz; i++ ) {
1580 #if defined(PETSC_COMPLEX)
1581       sum += real(conj(*v)*(*v)); v++;
1582 #else
1583       sum += (*v)*(*v); v++;
1584 #endif
1585     }
1586     *norm = sqrt(sum);
1587   }
1588   else {
1589     SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet");
1590   }
1591   return 0;
1592 }
1593 
1594 /*
1595      note: This can only work for identity for row and col. It would
1596    be good to check this and otherwise generate an error.
1597 */
1598 #undef __FUNCTION__
1599 #define __FUNCTION__ "MatILUFactor_SeqBAIJ"
1600 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1601 {
1602   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1603   Mat         outA;
1604   int         ierr;
1605 
1606   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1607 
1608   outA          = inA;
1609   inA->factor   = FACTOR_LU;
1610   a->row        = row;
1611   a->col        = col;
1612 
1613   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1614 
1615   if (!a->diag) {
1616     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1617   }
1618   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1619   return 0;
1620 }
1621 
1622 #undef __FUNCTION__
1623 #define __FUNCTION__ "MatScale_SeqBAIJ"
1624 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1625 {
1626   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1627   int         one = 1, totalnz = a->bs2*a->nz;
1628   BLscal_( &totalnz, alpha, a->a, &one );
1629   PLogFlops(totalnz);
1630   return 0;
1631 }
1632 
1633 #undef __FUNCTION__
1634 #define __FUNCTION__ "MatGetValues_SeqBAIJ"
1635 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1636 {
1637   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1638   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1639   int        *ai = a->i, *ailen = a->ilen;
1640   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1641   Scalar     *ap, *aa = a->a, zero = 0.0;
1642 
1643   for ( k=0; k<m; k++ ) { /* loop over rows */
1644     row  = im[k]; brow = row/bs;
1645     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1646     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1647     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1648     nrow = ailen[brow];
1649     for ( l=0; l<n; l++ ) { /* loop over columns */
1650       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1651       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1652       col  = in[l] ;
1653       bcol = col/bs;
1654       cidx = col%bs;
1655       ridx = row%bs;
1656       high = nrow;
1657       low  = 0; /* assume unsorted */
1658       while (high-low > 5) {
1659         t = (low+high)/2;
1660         if (rp[t] > bcol) high = t;
1661         else             low  = t;
1662       }
1663       for ( i=low; i<high; i++ ) {
1664         if (rp[i] > bcol) break;
1665         if (rp[i] == bcol) {
1666           *v++ = ap[bs2*i+bs*cidx+ridx];
1667           goto finished;
1668         }
1669       }
1670       *v++ = zero;
1671       finished:;
1672     }
1673   }
1674   return 0;
1675 }
1676 
1677 #undef __FUNCTION__
1678 #define __FUNCTION__ "MatGetBlockSize_SeqBAIJ"
1679 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1680 {
1681   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1682   *bs = baij->bs;
1683   return 0;
1684 }
1685 
1686 /* idx should be of length atlease bs */
1687 #undef __FUNCTION__
1688 #define __FUNCTION__ "MatZeroRows_SeqBAIJ_Check_Block"
1689 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1690 {
1691   int i,row;
1692   row = idx[0];
1693   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1694 
1695   for ( i=1; i<bs; i++ ) {
1696     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1697   }
1698   *flg = PETSC_TRUE;
1699   return 0;
1700 }
1701 
1702 #undef __FUNCTION__
1703 #define __FUNCTION__ "MatZeroRows_SeqBAIJ"
1704 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1705 {
1706   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1707   IS          is_local;
1708   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1709   PetscTruth  flg;
1710   Scalar      zero = 0.0,*aa;
1711 
1712   /* Make a copy of the IS and  sort it */
1713   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1714   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1716   ierr = ISSort(is_local); CHKERRQ(ierr);
1717   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1718 
1719   i = 0;
1720   while (i < is_n) {
1721     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1722     flg = PETSC_FALSE;
1723     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1724     if (flg) { /* There exists a block of rows to be Zerowed */
1725       baij->ilen[rows[i]/bs] = 0;
1726       i += bs;
1727     } else { /* Zero out only the requested row */
1728       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1729       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1730       for ( j=0; j<count; j++ ) {
1731         aa[0] = zero;
1732         aa+=bs;
1733       }
1734       i++;
1735     }
1736   }
1737   if (diag) {
1738     for ( j=0; j<is_n; j++ ) {
1739       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1740     }
1741   }
1742   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1743   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1744   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1745   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1746 
1747   return 0;
1748 }
1749 
1750 #undef __FUNCTION__
1751 #define __FUNCTION__ "MatPrintHelp_SeqBAIJ"
1752 int MatPrintHelp_SeqBAIJ(Mat A)
1753 {
1754   static int called = 0;
1755   MPI_Comm   comm = A->comm;
1756 
1757   if (called) return 0; else called = 1;
1758   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1759   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1760   return 0;
1761 }
1762 
1763 /* -------------------------------------------------------------------*/
1764 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1765        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1766        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1767        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1768        MatSolve_SeqBAIJ_N,0,
1769        0,0,
1770        MatLUFactor_SeqBAIJ,0,
1771        0,
1772        MatTranspose_SeqBAIJ,
1773        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1774        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1775        0,MatAssemblyEnd_SeqBAIJ,
1776        0,
1777        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1778        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1779        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1780        MatILUFactorSymbolic_SeqBAIJ,0,
1781        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1782        MatConvertSameType_SeqBAIJ,0,0,
1783        MatILUFactor_SeqBAIJ,0,0,
1784        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1785        MatGetValues_SeqBAIJ,0,
1786        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1787        0,0,0,MatGetBlockSize_SeqBAIJ,
1788        MatGetRowIJ_SeqBAIJ,
1789        MatRestoreRowIJ_SeqBAIJ};
1790 
1791 #undef __FUNCTION__
1792 #define __FUNCTION__ "MatCreateSeqBAIJ"
1793 /*@C
1794    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1795    compressed row) format.  For good matrix assembly performance the
1796    user should preallocate the matrix storage by setting the parameter nz
1797    (or the array nzz).  By setting these parameters accurately, performance
1798    during matrix assembly can be increased by more than a factor of 50.
1799 
1800    Input Parameters:
1801 .  comm - MPI communicator, set to MPI_COMM_SELF
1802 .  bs - size of block
1803 .  m - number of rows
1804 .  n - number of columns
1805 .  nz - number of block nonzeros per block row (same for all rows)
1806 .  nzz - array containing the number of block nonzeros in the various block rows
1807          (possibly different for each block row) or PETSC_NULL
1808 
1809    Output Parameter:
1810 .  A - the matrix
1811 
1812    Notes:
1813    The block AIJ format is fully compatible with standard Fortran 77
1814    storage.  That is, the stored row and column indices can begin at
1815    either one (as in Fortran) or zero.  See the users' manual for details.
1816 
1817    Specify the preallocated storage with either nz or nnz (not both).
1818    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1819    allocation.  For additional details, see the users manual chapter on
1820    matrices.
1821 
1822 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1823 @*/
1824 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1825 {
1826   Mat         B;
1827   Mat_SeqBAIJ *b;
1828   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1829 
1830   MPI_Comm_size(comm,&size);
1831   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1832 
1833   if (mbs*bs!=m || nbs*bs!=n)
1834     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1835 
1836   *A = 0;
1837   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1838   PLogObjectCreate(B);
1839   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1840   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1841   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1842   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1843   if (!flg) {
1844     switch (bs) {
1845       case 1:
1846         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1847         B->ops.solve           = MatSolve_SeqBAIJ_1;
1848         B->ops.mult            = MatMult_SeqBAIJ_1;
1849         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1850        break;
1851       case 2:
1852         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1853         B->ops.solve           = MatSolve_SeqBAIJ_2;
1854         B->ops.mult            = MatMult_SeqBAIJ_2;
1855         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1856         break;
1857       case 3:
1858         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1859         B->ops.solve           = MatSolve_SeqBAIJ_3;
1860         B->ops.mult            = MatMult_SeqBAIJ_3;
1861         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1862         break;
1863       case 4:
1864         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1865         B->ops.solve           = MatSolve_SeqBAIJ_4;
1866         B->ops.mult            = MatMult_SeqBAIJ_4;
1867         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1868         break;
1869       case 5:
1870         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1871         B->ops.solve           = MatSolve_SeqBAIJ_5;
1872         B->ops.mult            = MatMult_SeqBAIJ_5;
1873         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1874         break;
1875     }
1876   }
1877   B->destroy          = MatDestroy_SeqBAIJ;
1878   B->view             = MatView_SeqBAIJ;
1879   B->factor           = 0;
1880   B->lupivotthreshold = 1.0;
1881   B->mapping          = 0;
1882   b->row              = 0;
1883   b->col              = 0;
1884   b->reallocs         = 0;
1885 
1886   b->m       = m; B->m = m; B->M = m;
1887   b->n       = n; B->n = n; B->N = n;
1888   b->mbs     = mbs;
1889   b->nbs     = nbs;
1890   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1891   if (nnz == PETSC_NULL) {
1892     if (nz == PETSC_DEFAULT) nz = 5;
1893     else if (nz <= 0)        nz = 1;
1894     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1895     nz = nz*mbs;
1896   }
1897   else {
1898     nz = 0;
1899     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1900   }
1901 
1902   /* allocate the matrix space */
1903   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1904   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1905   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1906   b->j  = (int *) (b->a + nz*bs2);
1907   PetscMemzero(b->j,nz*sizeof(int));
1908   b->i  = b->j + nz;
1909   b->singlemalloc = 1;
1910 
1911   b->i[0] = 0;
1912   for (i=1; i<mbs+1; i++) {
1913     b->i[i] = b->i[i-1] + b->imax[i-1];
1914   }
1915 
1916   /* b->ilen will count nonzeros in each block row so far. */
1917   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1918   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1919   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1920 
1921   b->bs               = bs;
1922   b->bs2              = bs2;
1923   b->mbs              = mbs;
1924   b->nz               = 0;
1925   b->maxnz            = nz*bs2;
1926   b->sorted           = 0;
1927   b->roworiented      = 1;
1928   b->nonew            = 0;
1929   b->diag             = 0;
1930   b->solve_work       = 0;
1931   b->mult_work        = 0;
1932   b->spptr            = 0;
1933   B->info.nz_unneeded = (double)b->maxnz;
1934 
1935   *A = B;
1936   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1937   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1938   return 0;
1939 }
1940 
1941 #undef __FUNCTION__
1942 #define __FUNCTION__ "MatConvertSameType_SeqBAIJ"
1943 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1944 {
1945   Mat         C;
1946   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1947   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1948 
1949   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1950 
1951   *B = 0;
1952   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1953   PLogObjectCreate(C);
1954   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1955   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1956   C->destroy    = MatDestroy_SeqBAIJ;
1957   C->view       = MatView_SeqBAIJ;
1958   C->factor     = A->factor;
1959   c->row        = 0;
1960   c->col        = 0;
1961   C->assembled  = PETSC_TRUE;
1962 
1963   c->m = C->m   = a->m;
1964   c->n = C->n   = a->n;
1965   C->M          = a->m;
1966   C->N          = a->n;
1967 
1968   c->bs         = a->bs;
1969   c->bs2        = a->bs2;
1970   c->mbs        = a->mbs;
1971   c->nbs        = a->nbs;
1972 
1973   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1974   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1975   for ( i=0; i<mbs; i++ ) {
1976     c->imax[i] = a->imax[i];
1977     c->ilen[i] = a->ilen[i];
1978   }
1979 
1980   /* allocate the matrix space */
1981   c->singlemalloc = 1;
1982   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1983   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1984   c->j  = (int *) (c->a + nz*bs2);
1985   c->i  = c->j + nz;
1986   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1987   if (mbs > 0) {
1988     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1989     if (cpvalues == COPY_VALUES) {
1990       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1991     }
1992   }
1993 
1994   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1995   c->sorted      = a->sorted;
1996   c->roworiented = a->roworiented;
1997   c->nonew       = a->nonew;
1998 
1999   if (a->diag) {
2000     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2001     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2002     for ( i=0; i<mbs; i++ ) {
2003       c->diag[i] = a->diag[i];
2004     }
2005   }
2006   else c->diag          = 0;
2007   c->nz                 = a->nz;
2008   c->maxnz              = a->maxnz;
2009   c->solve_work         = 0;
2010   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2011   c->mult_work          = 0;
2012   *B = C;
2013   return 0;
2014 }
2015 
2016 #undef __FUNCTION__
2017 #define __FUNCTION__ "MatLoad_SeqBAIJ"
2018 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2019 {
2020   Mat_SeqBAIJ  *a;
2021   Mat          B;
2022   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2023   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2024   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2025   int          *masked, nmask,tmp,bs2,ishift;
2026   Scalar       *aa;
2027   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2028 
2029   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2030   bs2  = bs*bs;
2031 
2032   MPI_Comm_size(comm,&size);
2033   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
2034   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2035   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2036   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
2037   M = header[1]; N = header[2]; nz = header[3];
2038 
2039   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
2040 
2041   /*
2042      This code adds extra rows to make sure the number of rows is
2043     divisible by the blocksize
2044   */
2045   mbs        = M/bs;
2046   extra_rows = bs - M + bs*(mbs);
2047   if (extra_rows == bs) extra_rows = 0;
2048   else                  mbs++;
2049   if (extra_rows) {
2050     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2051   }
2052 
2053   /* read in row lengths */
2054   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2055   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2056   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2057 
2058   /* read in column indices */
2059   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2060   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2061   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2062 
2063   /* loop over row lengths determining block row lengths */
2064   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2065   PetscMemzero(browlengths,mbs*sizeof(int));
2066   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2067   PetscMemzero(mask,mbs*sizeof(int));
2068   masked = mask + mbs;
2069   rowcount = 0; nzcount = 0;
2070   for ( i=0; i<mbs; i++ ) {
2071     nmask = 0;
2072     for ( j=0; j<bs; j++ ) {
2073       kmax = rowlengths[rowcount];
2074       for ( k=0; k<kmax; k++ ) {
2075         tmp = jj[nzcount++]/bs;
2076         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2077       }
2078       rowcount++;
2079     }
2080     browlengths[i] += nmask;
2081     /* zero out the mask elements we set */
2082     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2083   }
2084 
2085   /* create our matrix */
2086   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2087          CHKERRQ(ierr);
2088   B = *A;
2089   a = (Mat_SeqBAIJ *) B->data;
2090 
2091   /* set matrix "i" values */
2092   a->i[0] = 0;
2093   for ( i=1; i<= mbs; i++ ) {
2094     a->i[i]      = a->i[i-1] + browlengths[i-1];
2095     a->ilen[i-1] = browlengths[i-1];
2096   }
2097   a->nz         = 0;
2098   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2099 
2100   /* read in nonzero values */
2101   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2102   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2103   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2104 
2105   /* set "a" and "j" values into matrix */
2106   nzcount = 0; jcount = 0;
2107   for ( i=0; i<mbs; i++ ) {
2108     nzcountb = nzcount;
2109     nmask    = 0;
2110     for ( j=0; j<bs; j++ ) {
2111       kmax = rowlengths[i*bs+j];
2112       for ( k=0; k<kmax; k++ ) {
2113         tmp = jj[nzcount++]/bs;
2114 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2115       }
2116       rowcount++;
2117     }
2118     /* sort the masked values */
2119     PetscSortInt(nmask,masked);
2120 
2121     /* set "j" values into matrix */
2122     maskcount = 1;
2123     for ( j=0; j<nmask; j++ ) {
2124       a->j[jcount++]  = masked[j];
2125       mask[masked[j]] = maskcount++;
2126     }
2127     /* set "a" values into matrix */
2128     ishift = bs2*a->i[i];
2129     for ( j=0; j<bs; j++ ) {
2130       kmax = rowlengths[i*bs+j];
2131       for ( k=0; k<kmax; k++ ) {
2132         tmp    = jj[nzcountb]/bs ;
2133         block  = mask[tmp] - 1;
2134         point  = jj[nzcountb] - bs*tmp;
2135         idx    = ishift + bs2*block + j + bs*point;
2136         a->a[idx] = aa[nzcountb++];
2137       }
2138     }
2139     /* zero out the mask elements we set */
2140     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2141   }
2142   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2143 
2144   PetscFree(rowlengths);
2145   PetscFree(browlengths);
2146   PetscFree(aa);
2147   PetscFree(jj);
2148   PetscFree(mask);
2149 
2150   B->assembled = PETSC_TRUE;
2151 
2152   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2153   if (flg) {
2154     Viewer tviewer;
2155     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2156     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2157     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2158     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2159   }
2160   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2161   if (flg) {
2162     Viewer tviewer;
2163     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2164     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2165     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2166     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2167   }
2168   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2169   if (flg) {
2170     Viewer tviewer;
2171     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2172     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2173     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2174   }
2175   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2176   if (flg) {
2177     Viewer tviewer;
2178     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2179     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2180     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2181     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2182   }
2183   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2184   if (flg) {
2185     Viewer tviewer;
2186     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2187     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2188     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2189     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2190   }
2191   return 0;
2192 }
2193 
2194 
2195 
2196