xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 6a67db7e1512b5d9ff9ba8f94e48d05f0a9eae3d)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.66 1996/08/15 12:48:03 bsmith Exp curfman $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
15 
16 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
17 {
18   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
19   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
20 
21   /*
22      this is tacky: In the future when we have written special factorization
23      and solve routines for the identity permutation we should use a
24      stride index set instead of the general one.
25   */
26   if (type  == ORDER_NATURAL) {
27     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
28     for ( i=0; i<n; i++ ) idx[i] = i;
29     ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
30     ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
31     PetscFree(idx);
32     ISSetPermutation(*rperm);
33     ISSetPermutation(*cperm);
34     ISSetIdentity(*rperm);
35     ISSetIdentity(*cperm);
36     return 0;
37   }
38 
39   MatReorderingRegisterAll();
40   ishift = 0;
41   oshift = -MatReorderingIndexShift[(int)type];
42   if (MatReorderingRequiresSymmetric[(int)type]) {
43     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
44     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
45     PetscFree(ia); PetscFree(ja);
46   } else {
47     if (ishift == oshift) {
48       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
49     }
50     else if (ishift == -1) {
51       /* temporarily subtract 1 from i and j indices */
52       int nz = a->i[n] - 1;
53       for ( i=0; i<nz; i++ ) a->j[i]--;
54       for ( i=0; i<n+1; i++ ) a->i[i]--;
55       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
56       for ( i=0; i<nz; i++ ) a->j[i]++;
57       for ( i=0; i<n+1; i++ ) a->i[i]++;
58     } else {
59       /* temporarily add 1 to i and j indices */
60       int nz = a->i[n] - 1;
61       for ( i=0; i<nz; i++ ) a->j[i]++;
62       for ( i=0; i<n+1; i++ ) a->i[i]++;
63       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
64       for ( i=0; i<nz; i++ ) a->j[i]--;
65       for ( i=0; i<n+1; i++ ) a->i[i]--;
66     }
67   }
68   return 0;
69 }
70 
71 /*
72      Adds diagonal pointers to sparse matrix structure.
73 */
74 
75 int MatMarkDiag_SeqBAIJ(Mat A)
76 {
77   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
78   int         i,j, *diag, m = a->mbs;
79 
80   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
81   PLogObjectMemory(A,(m+1)*sizeof(int));
82   for ( i=0; i<m; i++ ) {
83     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
84       if (a->j[j] == i) {
85         diag[i] = j;
86         break;
87       }
88     }
89   }
90   a->diag = diag;
91   return 0;
92 }
93 
94 #include "draw.h"
95 #include "pinclude/pviewer.h"
96 #include "sys.h"
97 
98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
99 {
100   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
101   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102   Scalar      *aa;
103 
104   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
105   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
106   col_lens[0] = MAT_COOKIE;
107   col_lens[1] = a->m;
108   col_lens[2] = a->n;
109   col_lens[3] = a->nz*bs2;
110 
111   /* store lengths of each row and write (including header) to file */
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
116     }
117   }
118   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
119   PetscFree(col_lens);
120 
121   /* store column indices (zero start index) */
122   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
123   count = 0;
124   for ( i=0; i<a->mbs; i++ ) {
125     for ( j=0; j<bs; j++ ) {
126       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
127         for ( l=0; l<bs; l++ ) {
128           jj[count++] = bs*a->j[k] + l;
129         }
130       }
131     }
132   }
133   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
134   PetscFree(jj);
135 
136   /* store nonzero values */
137   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
138   count = 0;
139   for ( i=0; i<a->mbs; i++ ) {
140     for ( j=0; j<bs; j++ ) {
141       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
142         for ( l=0; l<bs; l++ ) {
143           aa[count++] = a->a[bs2*k + l*bs + j];
144         }
145       }
146     }
147   }
148   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
149   PetscFree(aa);
150   return 0;
151 }
152 
153 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
154 {
155   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
156   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
157   FILE        *fd;
158   char        *outputname;
159 
160   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
161   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
162   ierr = ViewerGetFormat(viewer,&format);
163   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
164     fprintf(fd,"  block size is %d\n",bs);
165   }
166   else if (format == ASCII_FORMAT_MATLAB) {
167     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
168   }
169   else if (format == ASCII_FORMAT_COMMON) {
170     for ( i=0; i<a->mbs; i++ ) {
171       for ( j=0; j<bs; j++ ) {
172         fprintf(fd,"row %d:",i*bs+j);
173         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
174           for ( l=0; l<bs; l++ ) {
175 #if defined(PETSC_COMPLEX)
176           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
177             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
178               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
179           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
180             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
181 #else
182           if (a->a[bs2*k + l*bs + j] != 0.0)
183             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
184 #endif
185           }
186         }
187         fprintf(fd,"\n");
188       }
189     }
190   }
191   else {
192     for ( i=0; i<a->mbs; i++ ) {
193       for ( j=0; j<bs; j++ ) {
194         fprintf(fd,"row %d:",i*bs+j);
195         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
196           for ( l=0; l<bs; l++ ) {
197 #if defined(PETSC_COMPLEX)
198           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
199             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
200               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
201           }
202           else {
203             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
204           }
205 #else
206             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
207 #endif
208           }
209         }
210         fprintf(fd,"\n");
211       }
212     }
213   }
214   fflush(fd);
215   return 0;
216 }
217 
218 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
219 {
220   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
221   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
222   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
223   Scalar       *aa;
224   Draw         draw;
225   DrawButton   button;
226   PetscTruth   isnull;
227 
228   ViewerDrawGetDraw(viewer,&draw);
229   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
230 
231   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
232   xr += w;    yr += h;  xl = -w;     yl = -h;
233   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
234   /* loop over matrix elements drawing boxes */
235   color = DRAW_BLUE;
236   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
237     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
238       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
239       x_l = a->j[j]*bs; x_r = x_l + 1.0;
240       aa = a->a + j*bs2;
241       for ( k=0; k<bs; k++ ) {
242         for ( l=0; l<bs; l++ ) {
243           if (PetscReal(*aa++) >=  0.) continue;
244           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
245         }
246       }
247     }
248   }
249   color = DRAW_CYAN;
250   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
251     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
252       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
253       x_l = a->j[j]*bs; x_r = x_l + 1.0;
254       aa = a->a + j*bs2;
255       for ( k=0; k<bs; k++ ) {
256         for ( l=0; l<bs; l++ ) {
257           if (PetscReal(*aa++) != 0.) continue;
258           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
259         }
260       }
261     }
262   }
263 
264   color = DRAW_RED;
265   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
266     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
267       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
268       x_l = a->j[j]*bs; x_r = x_l + 1.0;
269       aa = a->a + j*bs2;
270       for ( k=0; k<bs; k++ ) {
271         for ( l=0; l<bs; l++ ) {
272           if (PetscReal(*aa++) <= 0.) continue;
273           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
274         }
275       }
276     }
277   }
278 
279   DrawFlush(draw);
280   DrawGetPause(draw,&pause);
281   if (pause >= 0) { PetscSleep(pause); return 0;}
282 
283   /* allow the matrix to zoom or shrink */
284   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
285   while (button != BUTTON_RIGHT) {
286     DrawClear(draw);
287     if (button == BUTTON_LEFT) scale = .5;
288     else if (button == BUTTON_CENTER) scale = 2.;
289     xl = scale*(xl + w - xc) + xc - w*scale;
290     xr = scale*(xr - w - xc) + xc + w*scale;
291     yl = scale*(yl + h - yc) + yc - h*scale;
292     yr = scale*(yr - h - yc) + yc + h*scale;
293     w *= scale; h *= scale;
294     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
295     color = DRAW_BLUE;
296     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
297       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
298         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
299         x_l = a->j[j]*bs; x_r = x_l + 1.0;
300         aa = a->a + j*bs2;
301         for ( k=0; k<bs; k++ ) {
302           for ( l=0; l<bs; l++ ) {
303             if (PetscReal(*aa++) >=  0.) continue;
304             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
305           }
306         }
307       }
308     }
309     color = DRAW_CYAN;
310     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
311       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
312         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
313         x_l = a->j[j]*bs; x_r = x_l + 1.0;
314         aa = a->a + j*bs2;
315         for ( k=0; k<bs; k++ ) {
316           for ( l=0; l<bs; l++ ) {
317           if (PetscReal(*aa++) != 0.) continue;
318           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
319           }
320         }
321       }
322     }
323 
324     color = DRAW_RED;
325     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
326       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
327         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
328         x_l = a->j[j]*bs; x_r = x_l + 1.0;
329         aa = a->a + j*bs2;
330         for ( k=0; k<bs; k++ ) {
331           for ( l=0; l<bs; l++ ) {
332             if (PetscReal(*aa++) <= 0.) continue;
333             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
334           }
335         }
336       }
337     }
338     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
339   }
340   return 0;
341 }
342 
343 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
344 {
345   Mat         A = (Mat) obj;
346   ViewerType  vtype;
347   int         ierr;
348 
349   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
350   if (vtype == MATLAB_VIEWER) {
351     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
352   }
353   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
354     return MatView_SeqBAIJ_ASCII(A,viewer);
355   }
356   else if (vtype == BINARY_FILE_VIEWER) {
357     return MatView_SeqBAIJ_Binary(A,viewer);
358   }
359   else if (vtype == DRAW_VIEWER) {
360     return MatView_SeqBAIJ_Draw(A,viewer);
361   }
362   return 0;
363 }
364 
365 #define CHUNKSIZE  10
366 
367 /* This version has row oriented v  */
368 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
369 {
370   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
371   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
372   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
373   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
374   int          ridx,cidx,bs2=a->bs2;
375   Scalar      *ap,value,*aa=a->a,*bap;
376 
377   for ( k=0; k<m; k++ ) { /* loop over added rows */
378     row  = im[k]; brow = row/bs;
379     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
380     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
381     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
382     rmax = imax[brow]; nrow = ailen[brow];
383     low = 0;
384     for ( l=0; l<n; l++ ) { /* loop over added columns */
385       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
386       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
387       col = in[l]; bcol = col/bs;
388       ridx = row % bs; cidx = col % bs;
389       if (roworiented) {
390         value = *v++;
391       }
392       else {
393         value = v[k + l*m];
394       }
395       if (!sorted) low = 0; high = nrow;
396       while (high-low > 5) {
397         t = (low+high)/2;
398         if (rp[t] > bcol) high = t;
399         else             low  = t;
400       }
401       for ( i=low; i<high; i++ ) {
402         if (rp[i] > bcol) break;
403         if (rp[i] == bcol) {
404           bap  = ap +  bs2*i + bs*cidx + ridx;
405           if (is == ADD_VALUES) *bap += value;
406           else                  *bap  = value;
407           goto noinsert;
408         }
409       }
410       if (nonew) goto noinsert;
411       if (nrow >= rmax) {
412         /* there is no extra room in row, therefore enlarge */
413         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
414         Scalar *new_a;
415 
416         /* malloc new storage space */
417         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
418         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
419         new_j   = (int *) (new_a + bs2*new_nz);
420         new_i   = new_j + new_nz;
421 
422         /* copy over old data into new slots */
423         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
424         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
425         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
426         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
427         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
428                                                            len*sizeof(int));
429         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
430         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
431         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
432                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
433         /* free up old matrix storage */
434         PetscFree(a->a);
435         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
436         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
437         a->singlemalloc = 1;
438 
439         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
440         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
441         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
442         a->maxnz += bs2*CHUNKSIZE;
443         a->reallocs++;
444         a->nz++;
445       }
446       N = nrow++ - 1;
447       /* shift up all the later entries in this row */
448       for ( ii=N; ii>=i; ii-- ) {
449         rp[ii+1] = rp[ii];
450          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
451       }
452       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
453       rp[i] = bcol;
454       ap[bs2*i + bs*cidx + ridx] = value;
455       noinsert:;
456       low = i;
457     }
458     ailen[brow] = nrow;
459   }
460   return 0;
461 }
462 
463 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = a->m; *n = a->n;
467   return 0;
468 }
469 
470 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   *m = 0; *n = a->m;
474   return 0;
475 }
476 
477 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
478 {
479   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
480   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
481   Scalar      *aa,*v_i,*aa_i;
482 
483   bs  = a->bs;
484   ai  = a->i;
485   aj  = a->j;
486   aa  = a->a;
487   bs2 = a->bs2;
488 
489   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
490 
491   bn  = row/bs;   /* Block number */
492   bp  = row % bs; /* Block Position */
493   M   = ai[bn+1] - ai[bn];
494   *nz = bs*M;
495 
496   if (v) {
497     *v = 0;
498     if (*nz) {
499       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
500       for ( i=0; i<M; i++ ) { /* for each block in the block row */
501         v_i  = *v + i*bs;
502         aa_i = aa + bs2*(ai[bn] + i);
503         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
504       }
505     }
506   }
507 
508   if (idx) {
509     *idx = 0;
510     if (*nz) {
511       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
512       for ( i=0; i<M; i++ ) { /* for each block in the block row */
513         idx_i = *idx + i*bs;
514         itmp  = bs*aj[ai[bn] + i];
515         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
516       }
517     }
518   }
519   return 0;
520 }
521 
522 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
523 {
524   if (idx) {if (*idx) PetscFree(*idx);}
525   if (v)   {if (*v)   PetscFree(*v);}
526   return 0;
527 }
528 
529 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
530 {
531   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
532   Mat         C;
533   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
534   int         *rows,*cols,bs2=a->bs2;
535   Scalar      *array=a->a;
536 
537   if (B==PETSC_NULL && mbs!=nbs)
538     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
539   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
540   PetscMemzero(col,(1+nbs)*sizeof(int));
541 
542   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
543   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
544   PetscFree(col);
545   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
546   cols = rows + bs;
547   for ( i=0; i<mbs; i++ ) {
548     cols[0] = i*bs;
549     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
550     len = ai[i+1] - ai[i];
551     for ( j=0; j<len; j++ ) {
552       rows[0] = (*aj++)*bs;
553       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
554       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
555       array += bs2;
556     }
557   }
558   PetscFree(rows);
559 
560   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
561   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
562 
563   if (B != PETSC_NULL) {
564     *B = C;
565   } else {
566     /* This isn't really an in-place transpose */
567     PetscFree(a->a);
568     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
569     if (a->diag) PetscFree(a->diag);
570     if (a->ilen) PetscFree(a->ilen);
571     if (a->imax) PetscFree(a->imax);
572     if (a->solve_work) PetscFree(a->solve_work);
573     PetscFree(a);
574     PetscMemcpy(A,C,sizeof(struct _Mat));
575     PetscHeaderDestroy(C);
576   }
577   return 0;
578 }
579 
580 
581 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
582 {
583   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
584   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
585   int        m = a->m,*ip, N, *ailen = a->ilen;
586   int        mbs = a->mbs, bs2 = a->bs2;
587   Scalar     *aa = a->a, *ap;
588 
589   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
590 
591   for ( i=1; i<mbs; i++ ) {
592     /* move each row back by the amount of empty slots (fshift) before it*/
593     fshift += imax[i-1] - ailen[i-1];
594     if (fshift) {
595       ip = aj + ai[i]; ap = aa + bs2*ai[i];
596       N = ailen[i];
597       for ( j=0; j<N; j++ ) {
598         ip[j-fshift] = ip[j];
599         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
600       }
601     }
602     ai[i] = ai[i-1] + ailen[i-1];
603   }
604   if (mbs) {
605     fshift += imax[mbs-1] - ailen[mbs-1];
606     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
607   }
608   /* reset ilen and imax for each row */
609   for ( i=0; i<mbs; i++ ) {
610     ailen[i] = imax[i] = ai[i+1] - ai[i];
611   }
612   a->nz = ai[mbs];
613 
614   /* diagonals may have moved, so kill the diagonal pointers */
615   if (fshift && a->diag) {
616     PetscFree(a->diag);
617     PLogObjectMemory(A,-(m+1)*sizeof(int));
618     a->diag = 0;
619   }
620   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
621            fshift*bs2,a->nz*bs2,m,a->bs);
622   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
623            a->reallocs);
624   A->info.nz_unneeded  = (double)fshift*bs2;
625 
626   return 0;
627 }
628 
629 static int MatZeroEntries_SeqBAIJ(Mat A)
630 {
631   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
632   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
633   return 0;
634 }
635 
636 int MatDestroy_SeqBAIJ(PetscObject obj)
637 {
638   Mat         A  = (Mat) obj;
639   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
640 
641 #if defined(PETSC_LOG)
642   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
643 #endif
644   PetscFree(a->a);
645   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
646   if (a->diag) PetscFree(a->diag);
647   if (a->ilen) PetscFree(a->ilen);
648   if (a->imax) PetscFree(a->imax);
649   if (a->solve_work) PetscFree(a->solve_work);
650   if (a->mult_work) PetscFree(a->mult_work);
651   PetscFree(a);
652   PLogObjectDestroy(A);
653   PetscHeaderDestroy(A);
654   return 0;
655 }
656 
657 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
658 {
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
660   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
661   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
662   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
663   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
664   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
665   else if (op == MAT_ROWS_SORTED ||
666            op == MAT_SYMMETRIC ||
667            op == MAT_STRUCTURALLY_SYMMETRIC ||
668            op == MAT_YES_NEW_DIAGONALS)
669     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
670   else if (op == MAT_NO_NEW_DIAGONALS)
671     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
672   else
673     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
674   return 0;
675 }
676 
677 
678 /* -------------------------------------------------------*/
679 /* Should check that shapes of vectors and matrices match */
680 /* -------------------------------------------------------*/
681 #include "pinclude/plapack.h"
682 
683 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
684 {
685   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
686   register Scalar *x,*z,*v,sum;
687   int             mbs=a->mbs,i,*idx,*ii,n;
688 
689   VecGetArray_Fast(xx,x);
690   VecGetArray_Fast(zz,z);
691 
692   idx   = a->j;
693   v     = a->a;
694   ii    = a->i;
695 
696   for ( i=0; i<mbs; i++ ) {
697     n    = ii[1] - ii[0]; ii++;
698     sum  = 0.0;
699     while (n--) sum += *v++ * x[*idx++];
700     z[i] = sum;
701   }
702   VecRestoreArray_Fast(xx,x);
703   VecRestoreArray_Fast(zz,z);
704   PLogFlops(2*a->nz - a->m);
705   return 0;
706 }
707 
708 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
709 {
710   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
711   register Scalar *x,*z,*v,*xb,sum1,sum2;
712   register Scalar x1,x2;
713   int             mbs=a->mbs,i,*idx,*ii;
714   int             j,n;
715 
716   VecGetArray_Fast(xx,x);
717   VecGetArray_Fast(zz,z);
718 
719   idx   = a->j;
720   v     = a->a;
721   ii    = a->i;
722 
723   for ( i=0; i<mbs; i++ ) {
724     n  = ii[1] - ii[0]; ii++;
725     sum1 = 0.0; sum2 = 0.0;
726     for ( j=0; j<n; j++ ) {
727       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
728       sum1 += v[0]*x1 + v[2]*x2;
729       sum2 += v[1]*x1 + v[3]*x2;
730       v += 4;
731     }
732     z[0] = sum1; z[1] = sum2;
733     z += 2;
734   }
735   VecRestoreArray_Fast(xx,x);
736   VecRestoreArray_Fast(zz,z);
737   PLogFlops(4*a->nz - a->m);
738   return 0;
739 }
740 
741 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
742 {
743   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
744   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
745   int             mbs=a->mbs,i,*idx,*ii,j,n;
746 
747   VecGetArray_Fast(xx,x);
748   VecGetArray_Fast(zz,z);
749 
750   idx   = a->j;
751   v     = a->a;
752   ii    = a->i;
753 
754   for ( i=0; i<mbs; i++ ) {
755     n  = ii[1] - ii[0]; ii++;
756     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
757     for ( j=0; j<n; j++ ) {
758       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
759       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
760       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
761       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
762       v += 9;
763     }
764     z[0] = sum1; z[1] = sum2; z[2] = sum3;
765     z += 3;
766   }
767   VecRestoreArray_Fast(xx,x);
768   VecRestoreArray_Fast(zz,z);
769   PLogFlops(18*a->nz - a->m);
770   return 0;
771 }
772 
773 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
774 {
775   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
776   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
777   register Scalar x1,x2,x3,x4;
778   int             mbs=a->mbs,i,*idx,*ii;
779   int             j,n;
780 
781   VecGetArray_Fast(xx,x);
782   VecGetArray_Fast(zz,z);
783 
784   idx   = a->j;
785   v     = a->a;
786   ii    = a->i;
787 
788   for ( i=0; i<mbs; i++ ) {
789     n  = ii[1] - ii[0]; ii++;
790     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
791     for ( j=0; j<n; j++ ) {
792       xb = x + 4*(*idx++);
793       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
794       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
795       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
796       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
797       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
798       v += 16;
799     }
800     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
801     z += 4;
802   }
803   VecRestoreArray_Fast(xx,x);
804   VecRestoreArray_Fast(zz,z);
805   PLogFlops(32*a->nz - a->m);
806   return 0;
807 }
808 
809 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
810 {
811   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
812   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
813   register Scalar x1,x2,x3,x4,x5;
814   int             mbs=a->mbs,i,*idx,*ii,j,n;
815 
816   VecGetArray_Fast(xx,x);
817   VecGetArray_Fast(zz,z);
818 
819   idx   = a->j;
820   v     = a->a;
821   ii    = a->i;
822 
823   for ( i=0; i<mbs; i++ ) {
824     n  = ii[1] - ii[0]; ii++;
825     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
826     for ( j=0; j<n; j++ ) {
827       xb = x + 5*(*idx++);
828       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
829       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
830       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
831       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
832       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
833       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
834       v += 25;
835     }
836     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
837     z += 5;
838   }
839   VecRestoreArray_Fast(xx,x);
840   VecRestoreArray_Fast(zz,z);
841   PLogFlops(50*a->nz - a->m);
842   return 0;
843 }
844 
845 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
846 {
847   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
848   register Scalar *x,*z,*v,*xb;
849   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
850   int             _One = 1,ncols,k;
851   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
852 
853   VecGetArray_Fast(xx,x);
854   VecGetArray_Fast(zz,z);
855 
856   idx   = a->j;
857   v     = a->a;
858   ii    = a->i;
859 
860 
861   if (!a->mult_work) {
862     k = PetscMax(a->m,a->n);
863     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
864   }
865   work = a->mult_work;
866   for ( i=0; i<mbs; i++ ) {
867     n     = ii[1] - ii[0]; ii++;
868     ncols = n*bs;
869     workt = work;
870     for ( j=0; j<n; j++ ) {
871       xb = x + bs*(*idx++);
872       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
873       workt += bs;
874     }
875     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
876     v += n*bs2;
877     z += bs;
878   }
879   VecRestoreArray_Fast(xx,x);
880   VecRestoreArray_Fast(zz,z);
881   PLogFlops(2*a->nz*bs2 - a->m);
882   return 0;
883 }
884 
885 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
886 {
887   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
888   register Scalar *x,*y,*z,*v,sum;
889   int             mbs=a->mbs,i,*idx,*ii,n;
890 
891   VecGetArray_Fast(xx,x);
892   VecGetArray_Fast(yy,y);
893   VecGetArray_Fast(zz,z);
894 
895   idx   = a->j;
896   v     = a->a;
897   ii    = a->i;
898 
899   for ( i=0; i<mbs; i++ ) {
900     n    = ii[1] - ii[0]; ii++;
901     sum  = y[i];
902     while (n--) sum += *v++ * x[*idx++];
903     z[i] = sum;
904   }
905   VecRestoreArray_Fast(xx,x);
906   VecRestoreArray_Fast(yy,y);
907   VecRestoreArray_Fast(zz,z);
908   PLogFlops(2*a->nz);
909   return 0;
910 }
911 
912 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
913 {
914   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
915   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
916   register Scalar x1,x2;
917   int             mbs=a->mbs,i,*idx,*ii;
918   int             j,n;
919 
920   VecGetArray_Fast(xx,x);
921   VecGetArray_Fast(yy,y);
922   VecGetArray_Fast(zz,z);
923 
924   idx   = a->j;
925   v     = a->a;
926   ii    = a->i;
927 
928   for ( i=0; i<mbs; i++ ) {
929     n  = ii[1] - ii[0]; ii++;
930     sum1 = y[0]; sum2 = y[1];
931     for ( j=0; j<n; j++ ) {
932       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
933       sum1 += v[0]*x1 + v[2]*x2;
934       sum2 += v[1]*x1 + v[3]*x2;
935       v += 4;
936     }
937     z[0] = sum1; z[1] = sum2;
938     z += 2; y += 2;
939   }
940   VecRestoreArray_Fast(xx,x);
941   VecRestoreArray_Fast(yy,y);
942   VecRestoreArray_Fast(zz,z);
943   PLogFlops(4*a->nz);
944   return 0;
945 }
946 
947 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
948 {
949   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
950   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
951   int             mbs=a->mbs,i,*idx,*ii,j,n;
952 
953   VecGetArray_Fast(xx,x);
954   VecGetArray_Fast(yy,y);
955   VecGetArray_Fast(zz,z);
956 
957   idx   = a->j;
958   v     = a->a;
959   ii    = a->i;
960 
961   for ( i=0; i<mbs; i++ ) {
962     n  = ii[1] - ii[0]; ii++;
963     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
964     for ( j=0; j<n; j++ ) {
965       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
966       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
967       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
968       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
969       v += 9;
970     }
971     z[0] = sum1; z[1] = sum2; z[2] = sum3;
972     z += 3; y += 3;
973   }
974   VecRestoreArray_Fast(xx,x);
975   VecRestoreArray_Fast(yy,y);
976   VecRestoreArray_Fast(zz,z);
977   PLogFlops(18*a->nz);
978   return 0;
979 }
980 
981 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
982 {
983   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
984   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
985   register Scalar x1,x2,x3,x4;
986   int             mbs=a->mbs,i,*idx,*ii;
987   int             j,n;
988 
989   VecGetArray_Fast(xx,x);
990   VecGetArray_Fast(yy,y);
991   VecGetArray_Fast(zz,z);
992 
993   idx   = a->j;
994   v     = a->a;
995   ii    = a->i;
996 
997   for ( i=0; i<mbs; i++ ) {
998     n  = ii[1] - ii[0]; ii++;
999     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1000     for ( j=0; j<n; j++ ) {
1001       xb = x + 4*(*idx++);
1002       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1003       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1004       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1005       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1006       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1007       v += 16;
1008     }
1009     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1010     z += 4; y += 4;
1011   }
1012   VecRestoreArray_Fast(xx,x);
1013   VecRestoreArray_Fast(yy,y);
1014   VecRestoreArray_Fast(zz,z);
1015   PLogFlops(32*a->nz);
1016   return 0;
1017 }
1018 
1019 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1020 {
1021   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1022   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1023   register Scalar x1,x2,x3,x4,x5;
1024   int             mbs=a->mbs,i,*idx,*ii,j,n;
1025 
1026   VecGetArray_Fast(xx,x);
1027   VecGetArray_Fast(yy,y);
1028   VecGetArray_Fast(zz,z);
1029 
1030   idx   = a->j;
1031   v     = a->a;
1032   ii    = a->i;
1033 
1034   for ( i=0; i<mbs; i++ ) {
1035     n  = ii[1] - ii[0]; ii++;
1036     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1037     for ( j=0; j<n; j++ ) {
1038       xb = x + 5*(*idx++);
1039       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1040       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1041       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1042       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1043       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1044       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1045       v += 25;
1046     }
1047     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1048     z += 5; y += 5;
1049   }
1050   VecRestoreArray_Fast(xx,x);
1051   VecRestoreArray_Fast(yy,y);
1052   VecRestoreArray_Fast(zz,z);
1053   PLogFlops(50*a->nz);
1054   return 0;
1055 }
1056 
1057 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1058 {
1059   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1060   register Scalar *x,*z,*v,*xb;
1061   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1062   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1063 
1064   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1065 
1066   VecGetArray_Fast(xx,x);
1067   VecGetArray_Fast(zz,z);
1068 
1069   idx   = a->j;
1070   v     = a->a;
1071   ii    = a->i;
1072 
1073 
1074   if (!a->mult_work) {
1075     k = PetscMax(a->m,a->n);
1076     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1077   }
1078   work = a->mult_work;
1079   for ( i=0; i<mbs; i++ ) {
1080     n     = ii[1] - ii[0]; ii++;
1081     ncols = n*bs;
1082     workt = work;
1083     for ( j=0; j<n; j++ ) {
1084       xb = x + bs*(*idx++);
1085       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1086       workt += bs;
1087     }
1088     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1089     v += n*bs2;
1090     z += bs;
1091   }
1092   VecRestoreArray_Fast(xx,x);
1093   VecRestoreArray_Fast(zz,z);
1094   PLogFlops(2*a->nz*bs2 );
1095   return 0;
1096 }
1097 
1098 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1099 {
1100   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1101   Scalar          *xg,*zg,*zb;
1102   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1103   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1104   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1105 
1106 
1107   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1108   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1109   PetscMemzero(z,N*sizeof(Scalar));
1110 
1111   idx   = a->j;
1112   v     = a->a;
1113   ii    = a->i;
1114 
1115   switch (bs) {
1116   case 1:
1117     for ( i=0; i<mbs; i++ ) {
1118       n  = ii[1] - ii[0]; ii++;
1119       xb = x + i; x1 = xb[0];
1120       ib = idx + ai[i];
1121       for ( j=0; j<n; j++ ) {
1122         rval    = ib[j];
1123         z[rval] += *v++ * x1;
1124       }
1125     }
1126     break;
1127   case 2:
1128     for ( i=0; i<mbs; i++ ) {
1129       n  = ii[1] - ii[0]; ii++;
1130       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1131       ib = idx + ai[i];
1132       for ( j=0; j<n; j++ ) {
1133         rval      = ib[j]*2;
1134         z[rval++] += v[0]*x1 + v[1]*x2;
1135         z[rval++] += v[2]*x1 + v[3]*x2;
1136         v += 4;
1137       }
1138     }
1139     break;
1140   case 3:
1141     for ( i=0; i<mbs; i++ ) {
1142       n  = ii[1] - ii[0]; ii++;
1143       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1144       ib = idx + ai[i];
1145       for ( j=0; j<n; j++ ) {
1146         rval      = ib[j]*3;
1147         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1148         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1149         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1150         v += 9;
1151       }
1152     }
1153     break;
1154   case 4:
1155     for ( i=0; i<mbs; i++ ) {
1156       n  = ii[1] - ii[0]; ii++;
1157       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1158       ib = idx + ai[i];
1159       for ( j=0; j<n; j++ ) {
1160         rval      = ib[j]*4;
1161         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1162         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1163         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1164         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1165         v += 16;
1166       }
1167     }
1168     break;
1169   case 5:
1170     for ( i=0; i<mbs; i++ ) {
1171       n  = ii[1] - ii[0]; ii++;
1172       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1173       x4 = xb[3];   x5 = xb[4];
1174       ib = idx + ai[i];
1175       for ( j=0; j<n; j++ ) {
1176         rval      = ib[j]*5;
1177         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1178         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1179         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1180         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1181         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1182         v += 25;
1183       }
1184     }
1185     break;
1186       /* block sizes larger then 5 by 5 are handled by BLAS */
1187     default: {
1188       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1189       if (!a->mult_work) {
1190         k = PetscMax(a->m,a->n);
1191         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1192         CHKPTRQ(a->mult_work);
1193       }
1194       work = a->mult_work;
1195       for ( i=0; i<mbs; i++ ) {
1196         n     = ii[1] - ii[0]; ii++;
1197         ncols = n*bs;
1198         PetscMemzero(work,ncols*sizeof(Scalar));
1199         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1200         v += n*bs2;
1201         x += bs;
1202         workt = work;
1203         for ( j=0; j<n; j++ ) {
1204           zb = z + bs*(*idx++);
1205           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1206           workt += bs;
1207         }
1208       }
1209     }
1210   }
1211   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1212   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1213   PLogFlops(2*a->nz*a->bs2 - a->n);
1214   return 0;
1215 }
1216 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1217 {
1218   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1219   Scalar          *xg,*zg,*zb;
1220   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1221   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1222   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1223 
1224 
1225 
1226   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1227   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1228 
1229   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1230   else PetscMemzero(z,N*sizeof(Scalar));
1231 
1232 
1233   idx   = a->j;
1234   v     = a->a;
1235   ii    = a->i;
1236 
1237   switch (bs) {
1238   case 1:
1239     for ( i=0; i<mbs; i++ ) {
1240       n  = ii[1] - ii[0]; ii++;
1241       xb = x + i; x1 = xb[0];
1242       ib = idx + ai[i];
1243       for ( j=0; j<n; j++ ) {
1244         rval    = ib[j];
1245         z[rval] += *v++ * x1;
1246       }
1247     }
1248     break;
1249   case 2:
1250     for ( i=0; i<mbs; i++ ) {
1251       n  = ii[1] - ii[0]; ii++;
1252       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1253       ib = idx + ai[i];
1254       for ( j=0; j<n; j++ ) {
1255         rval      = ib[j]*2;
1256         z[rval++] += v[0]*x1 + v[1]*x2;
1257         z[rval++] += v[2]*x1 + v[3]*x2;
1258         v += 4;
1259       }
1260     }
1261     break;
1262   case 3:
1263     for ( i=0; i<mbs; i++ ) {
1264       n  = ii[1] - ii[0]; ii++;
1265       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1266       ib = idx + ai[i];
1267       for ( j=0; j<n; j++ ) {
1268         rval      = ib[j]*3;
1269         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1270         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1271         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1272         v += 9;
1273       }
1274     }
1275     break;
1276   case 4:
1277     for ( i=0; i<mbs; i++ ) {
1278       n  = ii[1] - ii[0]; ii++;
1279       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1280       ib = idx + ai[i];
1281       for ( j=0; j<n; j++ ) {
1282         rval      = ib[j]*4;
1283         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1284         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1285         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1286         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1287         v += 16;
1288       }
1289     }
1290     break;
1291   case 5:
1292     for ( i=0; i<mbs; i++ ) {
1293       n  = ii[1] - ii[0]; ii++;
1294       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1295       x4 = xb[3];   x5 = xb[4];
1296       ib = idx + ai[i];
1297       for ( j=0; j<n; j++ ) {
1298         rval      = ib[j]*5;
1299         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1300         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1301         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1302         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1303         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1304         v += 25;
1305       }
1306     }
1307     break;
1308       /* block sizes larger then 5 by 5 are handled by BLAS */
1309     default: {
1310       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1311       if (!a->mult_work) {
1312         k = PetscMax(a->m,a->n);
1313         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1314         CHKPTRQ(a->mult_work);
1315       }
1316       work = a->mult_work;
1317       for ( i=0; i<mbs; i++ ) {
1318         n     = ii[1] - ii[0]; ii++;
1319         ncols = n*bs;
1320         PetscMemzero(work,ncols*sizeof(Scalar));
1321         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1322         v += n*bs2;
1323         x += bs;
1324         workt = work;
1325         for ( j=0; j<n; j++ ) {
1326           zb = z + bs*(*idx++);
1327           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1328           workt += bs;
1329         }
1330       }
1331     }
1332   }
1333   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1334   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1335   PLogFlops(2*a->nz*a->bs2);
1336   return 0;
1337 }
1338 
1339 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1340 {
1341   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1342 
1343   info->rows_global    = (double)a->m;
1344   info->columns_global = (double)a->n;
1345   info->rows_local     = (double)a->m;
1346   info->columns_local  = (double)a->n;
1347   info->block_size     = a->bs2;
1348   info->nz_allocated   = a->maxnz;
1349   info->nz_used        = a->bs2*a->nz;
1350   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1351   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1352     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1353   info->assemblies   = A->num_ass;
1354   info->mallocs      = a->reallocs;
1355   info->memory       = A->mem;
1356   if (A->factor) {
1357     info->fill_ratio_given  = A->info.fill_ratio_given;
1358     info->fill_ratio_needed = A->info.fill_ratio_needed;
1359     info->factor_mallocs    = A->info.factor_mallocs;
1360   } else {
1361     info->fill_ratio_given  = 0;
1362     info->fill_ratio_needed = 0;
1363     info->factor_mallocs    = 0;
1364   }
1365   return 0;
1366 }
1367 
1368 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1369 {
1370   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1371 
1372   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1373 
1374   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1375   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1376       (a->nz != b->nz)) {
1377     *flg = PETSC_FALSE; return 0;
1378   }
1379 
1380   /* if the a->i are the same */
1381   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1382     *flg = PETSC_FALSE; return 0;
1383   }
1384 
1385   /* if a->j are the same */
1386   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1387     *flg = PETSC_FALSE; return 0;
1388   }
1389 
1390   /* if a->a are the same */
1391   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1392     *flg = PETSC_FALSE; return 0;
1393   }
1394   *flg = PETSC_TRUE;
1395   return 0;
1396 
1397 }
1398 
1399 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1400 {
1401   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1402   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1403   Scalar      *x, zero = 0.0,*aa,*aa_j;
1404 
1405   bs   = a->bs;
1406   aa   = a->a;
1407   ai   = a->i;
1408   aj   = a->j;
1409   ambs = a->mbs;
1410   bs2  = a->bs2;
1411 
1412   VecSet(&zero,v);
1413   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1414   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1415   for ( i=0; i<ambs; i++ ) {
1416     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1417       if (aj[j] == i) {
1418         row  = i*bs;
1419         aa_j = aa+j*bs2;
1420         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1421         break;
1422       }
1423     }
1424   }
1425   return 0;
1426 }
1427 
1428 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1429 {
1430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1431   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1432   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1433 
1434   ai  = a->i;
1435   aj  = a->j;
1436   aa  = a->a;
1437   m   = a->m;
1438   n   = a->n;
1439   bs  = a->bs;
1440   mbs = a->mbs;
1441   bs2 = a->bs2;
1442   if (ll) {
1443     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1444     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1445     for ( i=0; i<mbs; i++ ) { /* for each block row */
1446       M  = ai[i+1] - ai[i];
1447       li = l + i*bs;
1448       v  = aa + bs2*ai[i];
1449       for ( j=0; j<M; j++ ) { /* for each block */
1450         for ( k=0; k<bs2; k++ ) {
1451           (*v++) *= li[k%bs];
1452         }
1453       }
1454     }
1455   }
1456 
1457   if (rr) {
1458     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1459     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1460     for ( i=0; i<mbs; i++ ) { /* for each block row */
1461       M  = ai[i+1] - ai[i];
1462       v  = aa + bs2*ai[i];
1463       for ( j=0; j<M; j++ ) { /* for each block */
1464         ri = r + bs*aj[ai[i]+j];
1465         for ( k=0; k<bs; k++ ) {
1466           x = ri[k];
1467           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1468         }
1469       }
1470     }
1471   }
1472   return 0;
1473 }
1474 
1475 
1476 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1477 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1478 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1479 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1480 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1481 
1482 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1483 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1484 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1485 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1486 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1487 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1488 
1489 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1490 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1491 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1492 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1493 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1494 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1495 
1496 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1497 {
1498   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1499   Scalar      *v = a->a;
1500   double      sum = 0.0;
1501   int         i,nz=a->nz,bs2=a->bs2;
1502 
1503   if (type == NORM_FROBENIUS) {
1504     for (i=0; i< bs2*nz; i++ ) {
1505 #if defined(PETSC_COMPLEX)
1506       sum += real(conj(*v)*(*v)); v++;
1507 #else
1508       sum += (*v)*(*v); v++;
1509 #endif
1510     }
1511     *norm = sqrt(sum);
1512   }
1513   else {
1514     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1515   }
1516   return 0;
1517 }
1518 
1519 /*
1520      note: This can only work for identity for row and col. It would
1521    be good to check this and otherwise generate an error.
1522 */
1523 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1524 {
1525   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1526   Mat         outA;
1527   int         ierr;
1528 
1529   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1530 
1531   outA          = inA;
1532   inA->factor   = FACTOR_LU;
1533   a->row        = row;
1534   a->col        = col;
1535 
1536   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1537 
1538   if (!a->diag) {
1539     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1540   }
1541   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1542   return 0;
1543 }
1544 
1545 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1546 {
1547   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1548   int         one = 1, totalnz = a->bs2*a->nz;
1549   BLscal_( &totalnz, alpha, a->a, &one );
1550   PLogFlops(totalnz);
1551   return 0;
1552 }
1553 
1554 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1555 {
1556   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1557   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1558   int        *ai = a->i, *ailen = a->ilen;
1559   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1560   Scalar     *ap, *aa = a->a, zero = 0.0;
1561 
1562   for ( k=0; k<m; k++ ) { /* loop over rows */
1563     row  = im[k]; brow = row/bs;
1564     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1565     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1566     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1567     nrow = ailen[brow];
1568     for ( l=0; l<n; l++ ) { /* loop over columns */
1569       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1570       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1571       col  = in[l] ;
1572       bcol = col/bs;
1573       cidx = col%bs;
1574       ridx = row%bs;
1575       high = nrow;
1576       low  = 0; /* assume unsorted */
1577       while (high-low > 5) {
1578         t = (low+high)/2;
1579         if (rp[t] > bcol) high = t;
1580         else             low  = t;
1581       }
1582       for ( i=low; i<high; i++ ) {
1583         if (rp[i] > bcol) break;
1584         if (rp[i] == bcol) {
1585           *v++ = ap[bs2*i+bs*cidx+ridx];
1586           goto finished;
1587         }
1588       }
1589       *v++ = zero;
1590       finished:;
1591     }
1592   }
1593   return 0;
1594 }
1595 
1596 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1597 {
1598   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1599   *bs = baij->bs;
1600   return 0;
1601 }
1602 
1603 /* idx should be of length atlease bs */
1604 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1605 {
1606   int i,row;
1607   row = idx[0];
1608   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1609 
1610   for ( i=1; i<bs; i++ ) {
1611     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1612   }
1613   *flg = PETSC_TRUE;
1614   return 0;
1615 }
1616 
1617 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1618 {
1619   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1620   IS          is_local;
1621   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1622   PetscTruth  flg;
1623   Scalar      zero = 0.0,*aa;
1624 
1625   /* Make a copy of the IS and  sort it */
1626   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1627   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1628   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1629   ierr = ISSort(is_local); CHKERRQ(ierr);
1630   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1631 
1632   i = 0;
1633   while (i < is_n) {
1634     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1635     flg = PETSC_FALSE;
1636     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1637     if (flg) { /* There exists a block of rows to be Zerowed */
1638       baij->ilen[rows[i]/bs] = 0;
1639       i += bs;
1640     } else { /* Zero out only the requested row */
1641       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1642       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1643       for ( j=0; j<count; j++ ) {
1644         aa[0] = zero;
1645         aa+=bs;
1646       }
1647       i++;
1648     }
1649   }
1650   if (diag) {
1651     for ( j=0; j<is_n; j++ ) {
1652       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1653     }
1654   }
1655   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1656   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1657   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1658   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1659   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1660 
1661   return 0;
1662 }
1663 int MatPrintHelp_SeqBAIJ(Mat A)
1664 {
1665   static int called = 0;
1666   MPI_Comm   comm = A->comm;
1667 
1668   if (called) return 0; else called = 1;
1669   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1670   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1671   return 0;
1672 }
1673 
1674 /* -------------------------------------------------------------------*/
1675 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1676        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1677        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1678        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1679        MatSolve_SeqBAIJ_N,0,
1680        0,0,
1681        MatLUFactor_SeqBAIJ,0,
1682        0,
1683        MatTranspose_SeqBAIJ,
1684        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1685        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1686        0,MatAssemblyEnd_SeqBAIJ,
1687        0,
1688        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1689        MatGetReordering_SeqBAIJ,
1690        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1691        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1692        MatILUFactorSymbolic_SeqBAIJ,0,
1693        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1694        MatConvertSameType_SeqBAIJ,0,0,
1695        MatILUFactor_SeqBAIJ,0,0,
1696        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1697        MatGetValues_SeqBAIJ,0,
1698        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1699        0,0,0,MatGetBlockSize_SeqBAIJ};
1700 
1701 /*@C
1702    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1703    compressed row) format.  For good matrix assembly performance the
1704    user should preallocate the matrix storage by setting the parameter nz
1705    (or the array nzz).  By setting these parameters accurately, performance
1706    during matrix assembly can be increased by more than a factor of 50.
1707 
1708    Input Parameters:
1709 .  comm - MPI communicator, set to MPI_COMM_SELF
1710 .  bs - size of block
1711 .  m - number of rows
1712 .  n - number of columns
1713 .  nz - number of block nonzeros per block row (same for all rows)
1714 .  nzz - array containing the number of block nonzeros in the various block rows
1715          (possibly different for each block row) or PETSC_NULL
1716 
1717    Output Parameter:
1718 .  A - the matrix
1719 
1720    Notes:
1721    The block AIJ format is fully compatible with standard Fortran 77
1722    storage.  That is, the stored row and column indices can begin at
1723    either one (as in Fortran) or zero.  See the users' manual for details.
1724 
1725    Specify the preallocated storage with either nz or nnz (not both).
1726    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1727    allocation.  For additional details, see the users manual chapter on
1728    matrices and the file $(PETSC_DIR)/Performance.
1729 
1730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1731 @*/
1732 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1733 {
1734   Mat         B;
1735   Mat_SeqBAIJ *b;
1736   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1737 
1738   if (mbs*bs!=m || nbs*bs!=n)
1739     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1740 
1741   *A = 0;
1742   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1743   PLogObjectCreate(B);
1744   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1745   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1746   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1747   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1748   if (!flg) {
1749     switch (bs) {
1750       case 1:
1751         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1752         B->ops.solve           = MatSolve_SeqBAIJ_1;
1753         B->ops.mult            = MatMult_SeqBAIJ_1;
1754         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1755        break;
1756       case 2:
1757         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1758         B->ops.solve           = MatSolve_SeqBAIJ_2;
1759         B->ops.mult            = MatMult_SeqBAIJ_2;
1760         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1761         break;
1762       case 3:
1763         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1764         B->ops.solve           = MatSolve_SeqBAIJ_3;
1765         B->ops.mult            = MatMult_SeqBAIJ_3;
1766         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1767         break;
1768       case 4:
1769         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1770         B->ops.solve           = MatSolve_SeqBAIJ_4;
1771         B->ops.mult            = MatMult_SeqBAIJ_4;
1772         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1773         break;
1774       case 5:
1775         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1776         B->ops.solve           = MatSolve_SeqBAIJ_5;
1777         B->ops.mult            = MatMult_SeqBAIJ_5;
1778         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1779         break;
1780     }
1781   }
1782   B->destroy          = MatDestroy_SeqBAIJ;
1783   B->view             = MatView_SeqBAIJ;
1784   B->factor           = 0;
1785   B->lupivotthreshold = 1.0;
1786   b->row              = 0;
1787   b->col              = 0;
1788   b->reallocs         = 0;
1789 
1790   b->m       = m; B->m = m; B->M = m;
1791   b->n       = n; B->n = n; B->N = n;
1792   b->mbs     = mbs;
1793   b->nbs     = nbs;
1794   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1795   if (nnz == PETSC_NULL) {
1796     if (nz == PETSC_DEFAULT) nz = 5;
1797     else if (nz <= 0)        nz = 1;
1798     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1799     nz = nz*mbs;
1800   }
1801   else {
1802     nz = 0;
1803     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1804   }
1805 
1806   /* allocate the matrix space */
1807   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1808   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1809   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1810   b->j  = (int *) (b->a + nz*bs2);
1811   PetscMemzero(b->j,nz*sizeof(int));
1812   b->i  = b->j + nz;
1813   b->singlemalloc = 1;
1814 
1815   b->i[0] = 0;
1816   for (i=1; i<mbs+1; i++) {
1817     b->i[i] = b->i[i-1] + b->imax[i-1];
1818   }
1819 
1820   /* b->ilen will count nonzeros in each block row so far. */
1821   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1822   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1823   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1824 
1825   b->bs               = bs;
1826   b->bs2              = bs2;
1827   b->mbs              = mbs;
1828   b->nz               = 0;
1829   b->maxnz            = nz*bs2;
1830   b->sorted           = 0;
1831   b->roworiented      = 1;
1832   b->nonew            = 0;
1833   b->diag             = 0;
1834   b->solve_work       = 0;
1835   b->mult_work        = 0;
1836   b->spptr            = 0;
1837   B->info.nz_unneeded = (double)b->maxnz;
1838 
1839   *A = B;
1840   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1841   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1842   return 0;
1843 }
1844 
1845 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1846 {
1847   Mat         C;
1848   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1849   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1850 
1851   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1852 
1853   *B = 0;
1854   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1855   PLogObjectCreate(C);
1856   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1857   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1858   C->destroy    = MatDestroy_SeqBAIJ;
1859   C->view       = MatView_SeqBAIJ;
1860   C->factor     = A->factor;
1861   c->row        = 0;
1862   c->col        = 0;
1863   C->assembled  = PETSC_TRUE;
1864 
1865   c->m = C->m   = a->m;
1866   c->n = C->n   = a->n;
1867   C->M          = a->m;
1868   C->N          = a->n;
1869 
1870   c->bs         = a->bs;
1871   c->bs2        = a->bs2;
1872   c->mbs        = a->mbs;
1873   c->nbs        = a->nbs;
1874 
1875   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1876   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1877   for ( i=0; i<mbs; i++ ) {
1878     c->imax[i] = a->imax[i];
1879     c->ilen[i] = a->ilen[i];
1880   }
1881 
1882   /* allocate the matrix space */
1883   c->singlemalloc = 1;
1884   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1885   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1886   c->j  = (int *) (c->a + nz*bs2);
1887   c->i  = c->j + nz;
1888   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1889   if (mbs > 0) {
1890     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1891     if (cpvalues == COPY_VALUES) {
1892       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1893     }
1894   }
1895 
1896   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1897   c->sorted      = a->sorted;
1898   c->roworiented = a->roworiented;
1899   c->nonew       = a->nonew;
1900 
1901   if (a->diag) {
1902     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1903     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1904     for ( i=0; i<mbs; i++ ) {
1905       c->diag[i] = a->diag[i];
1906     }
1907   }
1908   else c->diag          = 0;
1909   c->nz                 = a->nz;
1910   c->maxnz              = a->maxnz;
1911   c->solve_work         = 0;
1912   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1913   c->mult_work          = 0;
1914   *B = C;
1915   return 0;
1916 }
1917 
1918 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1919 {
1920   Mat_SeqBAIJ  *a;
1921   Mat          B;
1922   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1923   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1924   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1925   int          *masked, nmask,tmp,bs2,ishift;
1926   Scalar       *aa;
1927   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1928 
1929   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1930   bs2  = bs*bs;
1931 
1932   MPI_Comm_size(comm,&size);
1933   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1934   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1935   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1936   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1937   M = header[1]; N = header[2]; nz = header[3];
1938 
1939   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1940 
1941   /*
1942      This code adds extra rows to make sure the number of rows is
1943     divisible by the blocksize
1944   */
1945   mbs        = M/bs;
1946   extra_rows = bs - M + bs*(mbs);
1947   if (extra_rows == bs) extra_rows = 0;
1948   else                  mbs++;
1949   if (extra_rows) {
1950     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1951   }
1952 
1953   /* read in row lengths */
1954   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1955   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1956   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1957 
1958   /* read in column indices */
1959   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1960   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1961   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1962 
1963   /* loop over row lengths determining block row lengths */
1964   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1965   PetscMemzero(browlengths,mbs*sizeof(int));
1966   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1967   PetscMemzero(mask,mbs*sizeof(int));
1968   masked = mask + mbs;
1969   rowcount = 0; nzcount = 0;
1970   for ( i=0; i<mbs; i++ ) {
1971     nmask = 0;
1972     for ( j=0; j<bs; j++ ) {
1973       kmax = rowlengths[rowcount];
1974       for ( k=0; k<kmax; k++ ) {
1975         tmp = jj[nzcount++]/bs;
1976         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1977       }
1978       rowcount++;
1979     }
1980     browlengths[i] += nmask;
1981     /* zero out the mask elements we set */
1982     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1983   }
1984 
1985   /* create our matrix */
1986   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1987          CHKERRQ(ierr);
1988   B = *A;
1989   a = (Mat_SeqBAIJ *) B->data;
1990 
1991   /* set matrix "i" values */
1992   a->i[0] = 0;
1993   for ( i=1; i<= mbs; i++ ) {
1994     a->i[i]      = a->i[i-1] + browlengths[i-1];
1995     a->ilen[i-1] = browlengths[i-1];
1996   }
1997   a->nz         = 0;
1998   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1999 
2000   /* read in nonzero values */
2001   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2002   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2003   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2004 
2005   /* set "a" and "j" values into matrix */
2006   nzcount = 0; jcount = 0;
2007   for ( i=0; i<mbs; i++ ) {
2008     nzcountb = nzcount;
2009     nmask    = 0;
2010     for ( j=0; j<bs; j++ ) {
2011       kmax = rowlengths[i*bs+j];
2012       for ( k=0; k<kmax; k++ ) {
2013         tmp = jj[nzcount++]/bs;
2014 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2015       }
2016       rowcount++;
2017     }
2018     /* sort the masked values */
2019     PetscSortInt(nmask,masked);
2020 
2021     /* set "j" values into matrix */
2022     maskcount = 1;
2023     for ( j=0; j<nmask; j++ ) {
2024       a->j[jcount++]  = masked[j];
2025       mask[masked[j]] = maskcount++;
2026     }
2027     /* set "a" values into matrix */
2028     ishift = bs2*a->i[i];
2029     for ( j=0; j<bs; j++ ) {
2030       kmax = rowlengths[i*bs+j];
2031       for ( k=0; k<kmax; k++ ) {
2032         tmp    = jj[nzcountb]/bs ;
2033         block  = mask[tmp] - 1;
2034         point  = jj[nzcountb] - bs*tmp;
2035         idx    = ishift + bs2*block + j + bs*point;
2036         a->a[idx] = aa[nzcountb++];
2037       }
2038     }
2039     /* zero out the mask elements we set */
2040     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2041   }
2042   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2043 
2044   PetscFree(rowlengths);
2045   PetscFree(browlengths);
2046   PetscFree(aa);
2047   PetscFree(jj);
2048   PetscFree(mask);
2049 
2050   B->assembled = PETSC_TRUE;
2051 
2052   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2053   if (flg) {
2054     Viewer tviewer;
2055     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2056     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
2057     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2058     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2059   }
2060   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2061   if (flg) {
2062     Viewer tviewer;
2063     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2064     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
2065     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2066     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2067   }
2068   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2069   if (flg) {
2070     Viewer tviewer;
2071     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2072     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2073     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2074   }
2075   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2076   if (flg) {
2077     Viewer tviewer;
2078     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2079     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
2080     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2081     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2082   }
2083   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2084   if (flg) {
2085     Viewer tviewer;
2086     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2087     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2088     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2089     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2090   }
2091   return 0;
2092 }
2093 
2094 
2095 
2096