xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fb10cecf1927e102c85c17a39b04f58da85ca3ce)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25 {
26   Mat               A = *AA;
27   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
28   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int               *ai=a->i,*ailen=a->ilen;
30   int               *aj=a->j,stepval;
31   const PetscScalar *value = v;
32   MatScalar         *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 
163 #define CHUNKSIZE  10
164 
165 /*
166      Checks for missing diagonals
167 */
168 #undef __FUNCT__
169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
170 int MatMissingDiagonal_SeqBAIJ(Mat A)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173   int         *diag,*jj = a->j,i,ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
177   diag = a->diag;
178   for (i=0; i<a->mbs; i++) {
179     if (jj[diag[i]] != i) {
180       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181     }
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
188 int MatMarkDiagonal_SeqBAIJ(Mat A)
189 {
190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191   int         i,j,*diag,m = a->mbs,ierr;
192 
193   PetscFunctionBegin;
194   if (a->diag) PetscFunctionReturn(0);
195 
196   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
197   PetscLogObjectMemory(A,(m+1)*sizeof(int));
198   for (i=0; i<m; i++) {
199     diag[i] = a->i[i+1];
200     for (j=a->i[i]; j<a->i[i+1]; j++) {
201       if (a->j[j] == i) {
202         diag[i] = j;
203         break;
204       }
205     }
206   }
207   a->diag = diag;
208   PetscFunctionReturn(0);
209 }
210 
211 
212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217 {
218   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219   int         ierr,n = a->mbs,i;
220 
221   PetscFunctionBegin;
222   *nn = n;
223   if (!ia) PetscFunctionReturn(0);
224   if (symmetric) {
225     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
226   } else if (oshift == 1) {
227     /* temporarily add 1 to i and j indices */
228     int nz = a->i[n];
229     for (i=0; i<nz; i++) a->j[i]++;
230     for (i=0; i<n+1; i++) a->i[i]++;
231     *ia = a->i; *ja = a->j;
232   } else {
233     *ia = a->i; *ja = a->j;
234   }
235 
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242 {
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244   int         i,n = a->mbs,ierr;
245 
246   PetscFunctionBegin;
247   if (!ia) PetscFunctionReturn(0);
248   if (symmetric) {
249     ierr = PetscFree(*ia);CHKERRQ(ierr);
250     ierr = PetscFree(*ja);CHKERRQ(ierr);
251   } else if (oshift == 1) {
252     int nz = a->i[n]-1;
253     for (i=0; i<nz; i++) a->j[i]--;
254     for (i=0; i<n+1; i++) a->i[i]--;
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262 {
263   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
264 
265   PetscFunctionBegin;
266   *bs = baij->bs;
267   PetscFunctionReturn(0);
268 }
269 
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_SeqBAIJ"
273 int MatDestroy_SeqBAIJ(Mat A)
274 {
275   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276   int         ierr;
277 
278   PetscFunctionBegin;
279 #if defined(PETSC_USE_LOG)
280   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281 #endif
282   ierr = PetscFree(a->a);CHKERRQ(ierr);
283   if (!a->singlemalloc) {
284     ierr = PetscFree(a->i);CHKERRQ(ierr);
285     ierr = PetscFree(a->j);CHKERRQ(ierr);
286   }
287   if (a->row) {
288     ierr = ISDestroy(a->row);CHKERRQ(ierr);
289   }
290   if (a->col) {
291     ierr = ISDestroy(a->col);CHKERRQ(ierr);
292   }
293   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
294   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
295   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
296   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
297   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
298   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
299   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
300 #if defined(PETSC_USE_MAT_SINGLE)
301   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
302 #endif
303   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
304 
305   ierr = PetscFree(a);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSetOption_SeqBAIJ"
311 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312 {
313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
314 
315   PetscFunctionBegin;
316   switch (op) {
317   case MAT_ROW_ORIENTED:
318     a->roworiented    = PETSC_TRUE;
319     break;
320   case MAT_COLUMN_ORIENTED:
321     a->roworiented    = PETSC_FALSE;
322     break;
323   case MAT_COLUMNS_SORTED:
324     a->sorted         = PETSC_TRUE;
325     break;
326   case MAT_COLUMNS_UNSORTED:
327     a->sorted         = PETSC_FALSE;
328     break;
329   case MAT_KEEP_ZEROED_ROWS:
330     a->keepzeroedrows = PETSC_TRUE;
331     break;
332   case MAT_NO_NEW_NONZERO_LOCATIONS:
333     a->nonew          = 1;
334     break;
335   case MAT_NEW_NONZERO_LOCATION_ERR:
336     a->nonew          = -1;
337     break;
338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339     a->nonew          = -2;
340     break;
341   case MAT_YES_NEW_NONZERO_LOCATIONS:
342     a->nonew          = 0;
343     break;
344   case MAT_ROWS_SORTED:
345   case MAT_ROWS_UNSORTED:
346   case MAT_YES_NEW_DIAGONALS:
347   case MAT_IGNORE_OFF_PROC_ENTRIES:
348   case MAT_USE_HASH_TABLE:
349     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350     break;
351   case MAT_NO_NEW_DIAGONALS:
352     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353   case MAT_SYMMETRIC:
354   case MAT_STRUCTURALLY_SYMMETRIC:
355     break;
356   default:
357     SETERRQ(PETSC_ERR_SUP,"unknown option");
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetRow_SeqBAIJ"
364 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
365 {
366   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
367   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
368   MatScalar    *aa,*aa_i;
369   PetscScalar  *v_i;
370 
371   PetscFunctionBegin;
372   bs  = a->bs;
373   ai  = a->i;
374   aj  = a->j;
375   aa  = a->a;
376   bs2 = a->bs2;
377 
378   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
379 
380   bn  = row/bs;   /* Block number */
381   bp  = row % bs; /* Block Position */
382   M   = ai[bn+1] - ai[bn];
383   *nz = bs*M;
384 
385   if (v) {
386     *v = 0;
387     if (*nz) {
388       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
389       for (i=0; i<M; i++) { /* for each block in the block row */
390         v_i  = *v + i*bs;
391         aa_i = aa + bs2*(ai[bn] + i);
392         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
393       }
394     }
395   }
396 
397   if (idx) {
398     *idx = 0;
399     if (*nz) {
400       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
401       for (i=0; i<M; i++) { /* for each block in the block row */
402         idx_i = *idx + i*bs;
403         itmp  = bs*aj[ai[bn] + i];
404         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
405       }
406     }
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
413 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
414 {
415   int ierr;
416 
417   PetscFunctionBegin;
418   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
419   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatTranspose_SeqBAIJ"
425 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
426 {
427   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
428   Mat         C;
429   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
430   int         *rows,*cols,bs2=a->bs2;
431   PetscScalar *array;
432 
433   PetscFunctionBegin;
434   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
435   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
436   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
437 
438 #if defined(PETSC_USE_MAT_SINGLE)
439   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
440   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
441 #else
442   array = a->a;
443 #endif
444 
445   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
446   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
447   ierr = PetscFree(col);CHKERRQ(ierr);
448   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
449   cols = rows + bs;
450   for (i=0; i<mbs; i++) {
451     cols[0] = i*bs;
452     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
453     len = ai[i+1] - ai[i];
454     for (j=0; j<len; j++) {
455       rows[0] = (*aj++)*bs;
456       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
457       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
458       array += bs2;
459     }
460   }
461   ierr = PetscFree(rows);CHKERRQ(ierr);
462 #if defined(PETSC_USE_MAT_SINGLE)
463   ierr = PetscFree(array);
464 #endif
465 
466   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468 
469   if (B) {
470     *B = C;
471   } else {
472     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
479 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
480 {
481   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
482   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
483   PetscScalar *aa;
484   FILE        *file;
485 
486   PetscFunctionBegin;
487   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
488   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
489   col_lens[0] = MAT_FILE_COOKIE;
490 
491   col_lens[1] = A->m;
492   col_lens[2] = A->n;
493   col_lens[3] = a->nz*bs2;
494 
495   /* store lengths of each row and write (including header) to file */
496   count = 0;
497   for (i=0; i<a->mbs; i++) {
498     for (j=0; j<bs; j++) {
499       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
500     }
501   }
502   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
503   ierr = PetscFree(col_lens);CHKERRQ(ierr);
504 
505   /* store column indices (zero start index) */
506   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<a->mbs; i++) {
509     for (j=0; j<bs; j++) {
510       for (k=a->i[i]; k<a->i[i+1]; k++) {
511         for (l=0; l<bs; l++) {
512           jj[count++] = bs*a->j[k] + l;
513         }
514       }
515     }
516   }
517   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
518   ierr = PetscFree(jj);CHKERRQ(ierr);
519 
520   /* store nonzero values */
521   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
522   count = 0;
523   for (i=0; i<a->mbs; i++) {
524     for (j=0; j<bs; j++) {
525       for (k=a->i[i]; k<a->i[i+1]; k++) {
526         for (l=0; l<bs; l++) {
527           aa[count++] = a->a[bs2*k + l*bs + j];
528         }
529       }
530     }
531   }
532   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
533   ierr = PetscFree(aa);CHKERRQ(ierr);
534 
535   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
536   if (file) {
537     fprintf(file,"-matload_block_size %d\n",a->bs);
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
544 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
545 {
546   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
547   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
548   PetscViewerFormat format;
549 
550   PetscFunctionBegin;
551   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
552   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
553     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
554   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
555     Mat aij;
556     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
557     ierr = MatView(aij,viewer);CHKERRQ(ierr);
558     ierr = MatDestroy(aij);CHKERRQ(ierr);
559   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
560      PetscFunctionReturn(0);
561   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
562     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
563     for (i=0; i<a->mbs; i++) {
564       for (j=0; j<bs; j++) {
565         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
566         for (k=a->i[i]; k<a->i[i+1]; k++) {
567           for (l=0; l<bs; l++) {
568 #if defined(PETSC_USE_COMPLEX)
569             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
570               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
571                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
572             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
574                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
575             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
576               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
577             }
578 #else
579             if (a->a[bs2*k + l*bs + j] != 0.0) {
580               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
581             }
582 #endif
583           }
584         }
585         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
586       }
587     }
588     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
589   } else {
590     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
591     for (i=0; i<a->mbs; i++) {
592       for (j=0; j<bs; j++) {
593         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
594         for (k=a->i[i]; k<a->i[i+1]; k++) {
595           for (l=0; l<bs; l++) {
596 #if defined(PETSC_USE_COMPLEX)
597             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
598               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
599                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
600             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
601               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
602                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
603             } else {
604               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
605             }
606 #else
607             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
608 #endif
609           }
610         }
611         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
612       }
613     }
614     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
615   }
616   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
622 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
623 {
624   Mat          A = (Mat) Aa;
625   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
626   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
627   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
628   MatScalar    *aa;
629   PetscViewer  viewer;
630 
631   PetscFunctionBegin;
632 
633   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
634   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
635 
636   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
637 
638   /* loop over matrix elements drawing boxes */
639   color = PETSC_DRAW_BLUE;
640   for (i=0,row=0; i<mbs; i++,row+=bs) {
641     for (j=a->i[i]; j<a->i[i+1]; j++) {
642       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
643       x_l = a->j[j]*bs; x_r = x_l + 1.0;
644       aa = a->a + j*bs2;
645       for (k=0; k<bs; k++) {
646         for (l=0; l<bs; l++) {
647           if (PetscRealPart(*aa++) >=  0.) continue;
648           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   color = PETSC_DRAW_CYAN;
654   for (i=0,row=0; i<mbs; i++,row+=bs) {
655     for (j=a->i[i]; j<a->i[i+1]; j++) {
656       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
657       x_l = a->j[j]*bs; x_r = x_l + 1.0;
658       aa = a->a + j*bs2;
659       for (k=0; k<bs; k++) {
660         for (l=0; l<bs; l++) {
661           if (PetscRealPart(*aa++) != 0.) continue;
662           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
663         }
664       }
665     }
666   }
667 
668   color = PETSC_DRAW_RED;
669   for (i=0,row=0; i<mbs; i++,row+=bs) {
670     for (j=a->i[i]; j<a->i[i+1]; j++) {
671       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
672       x_l = a->j[j]*bs; x_r = x_l + 1.0;
673       aa = a->a + j*bs2;
674       for (k=0; k<bs; k++) {
675         for (l=0; l<bs; l++) {
676           if (PetscRealPart(*aa++) <= 0.) continue;
677           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
678         }
679       }
680     }
681   }
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
687 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
688 {
689   int          ierr;
690   PetscReal    xl,yl,xr,yr,w,h;
691   PetscDraw    draw;
692   PetscTruth   isnull;
693 
694   PetscFunctionBegin;
695 
696   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
697   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
698 
699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
700   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
701   xr += w;    yr += h;  xl = -w;     yl = -h;
702   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
703   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_SeqBAIJ"
710 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
711 {
712   int        ierr;
713   PetscTruth issocket,isascii,isbinary,isdraw;
714 
715   PetscFunctionBegin;
716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
717   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
718   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
719   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
720   if (issocket) {
721     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
722   } else if (isascii){
723     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
724   } else if (isbinary) {
725     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
726   } else if (isdraw) {
727     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
728   } else {
729     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
730   }
731   PetscFunctionReturn(0);
732 }
733 
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatGetValues_SeqBAIJ"
737 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
738 {
739   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
740   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
741   int        *ai = a->i,*ailen = a->ilen;
742   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
743   MatScalar  *ap,*aa = a->a,zero = 0.0;
744 
745   PetscFunctionBegin;
746   for (k=0; k<m; k++) { /* loop over rows */
747     row  = im[k]; brow = row/bs;
748     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
749     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
750     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
751     nrow = ailen[brow];
752     for (l=0; l<n; l++) { /* loop over columns */
753       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
754       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
755       col  = in[l] ;
756       bcol = col/bs;
757       cidx = col%bs;
758       ridx = row%bs;
759       high = nrow;
760       low  = 0; /* assume unsorted */
761       while (high-low > 5) {
762         t = (low+high)/2;
763         if (rp[t] > bcol) high = t;
764         else             low  = t;
765       }
766       for (i=low; i<high; i++) {
767         if (rp[i] > bcol) break;
768         if (rp[i] == bcol) {
769           *v++ = ap[bs2*i+bs*cidx+ridx];
770           goto finished;
771         }
772       }
773       *v++ = zero;
774       finished:;
775     }
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 #if defined(PETSC_USE_MAT_SINGLE)
781 #undef __FUNCT__
782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
783 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
784 {
785   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
786   int         ierr,i,N = m*n*b->bs2;
787   MatScalar   *vsingle;
788 
789   PetscFunctionBegin;
790   if (N > b->setvalueslen) {
791     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
792     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
793     b->setvalueslen  = N;
794   }
795   vsingle = b->setvaluescopy;
796   for (i=0; i<N; i++) {
797     vsingle[i] = v[i];
798   }
799   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
800   PetscFunctionReturn(0);
801 }
802 #endif
803 
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
807 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
808 {
809   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
810   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
811   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
812   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
813   PetscTruth        roworiented=a->roworiented;
814   const MatScalar   *value = v;
815   MatScalar         *ap,*aa = a->a,*bap;
816 
817   PetscFunctionBegin;
818   if (roworiented) {
819     stepval = (n-1)*bs;
820   } else {
821     stepval = (m-1)*bs;
822   }
823   for (k=0; k<m; k++) { /* loop over added rows */
824     row  = im[k];
825     if (row < 0) continue;
826 #if defined(PETSC_USE_BOPT_g)
827     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
828 #endif
829     rp   = aj + ai[row];
830     ap   = aa + bs2*ai[row];
831     rmax = imax[row];
832     nrow = ailen[row];
833     low  = 0;
834     for (l=0; l<n; l++) { /* loop over added columns */
835       if (in[l] < 0) continue;
836 #if defined(PETSC_USE_BOPT_g)
837       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
838 #endif
839       col = in[l];
840       if (roworiented) {
841         value = v + k*(stepval+bs)*bs + l*bs;
842       } else {
843         value = v + l*(stepval+bs)*bs + k*bs;
844       }
845       if (!sorted) low = 0; high = nrow;
846       while (high-low > 7) {
847         t = (low+high)/2;
848         if (rp[t] > col) high = t;
849         else             low  = t;
850       }
851       for (i=low; i<high; i++) {
852         if (rp[i] > col) break;
853         if (rp[i] == col) {
854           bap  = ap +  bs2*i;
855           if (roworiented) {
856             if (is == ADD_VALUES) {
857               for (ii=0; ii<bs; ii++,value+=stepval) {
858                 for (jj=ii; jj<bs2; jj+=bs) {
859                   bap[jj] += *value++;
860                 }
861               }
862             } else {
863               for (ii=0; ii<bs; ii++,value+=stepval) {
864                 for (jj=ii; jj<bs2; jj+=bs) {
865                   bap[jj] = *value++;
866                 }
867               }
868             }
869           } else {
870             if (is == ADD_VALUES) {
871               for (ii=0; ii<bs; ii++,value+=stepval) {
872                 for (jj=0; jj<bs; jj++) {
873                   *bap++ += *value++;
874                 }
875               }
876             } else {
877               for (ii=0; ii<bs; ii++,value+=stepval) {
878                 for (jj=0; jj<bs; jj++) {
879                   *bap++  = *value++;
880                 }
881               }
882             }
883           }
884           goto noinsert2;
885         }
886       }
887       if (nonew == 1) goto noinsert2;
888       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
889       if (nrow >= rmax) {
890         /* there is no extra room in row, therefore enlarge */
891         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
892         MatScalar *new_a;
893 
894         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
895 
896         /* malloc new storage space */
897         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
898 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
899         new_j   = (int*)(new_a + bs2*new_nz);
900         new_i   = new_j + new_nz;
901 
902         /* copy over old data into new slots */
903         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
904         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
905         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
906         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
907         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
908         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
909         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
910         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
911         /* free up old matrix storage */
912         ierr = PetscFree(a->a);CHKERRQ(ierr);
913         if (!a->singlemalloc) {
914           ierr = PetscFree(a->i);CHKERRQ(ierr);
915           ierr = PetscFree(a->j);CHKERRQ(ierr);
916         }
917         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
918         a->singlemalloc = PETSC_TRUE;
919 
920         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
921         rmax = imax[row] = imax[row] + CHUNKSIZE;
922         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
923         a->maxnz += bs2*CHUNKSIZE;
924         a->reallocs++;
925         a->nz++;
926       }
927       N = nrow++ - 1;
928       /* shift up all the later entries in this row */
929       for (ii=N; ii>=i; ii--) {
930         rp[ii+1] = rp[ii];
931         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
932       }
933       if (N >= i) {
934         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
935       }
936       rp[i] = col;
937       bap   = ap +  bs2*i;
938       if (roworiented) {
939         for (ii=0; ii<bs; ii++,value+=stepval) {
940           for (jj=ii; jj<bs2; jj+=bs) {
941             bap[jj] = *value++;
942           }
943         }
944       } else {
945         for (ii=0; ii<bs; ii++,value+=stepval) {
946           for (jj=0; jj<bs; jj++) {
947             *bap++  = *value++;
948           }
949         }
950       }
951       noinsert2:;
952       low = i;
953     }
954     ailen[row] = nrow;
955   }
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
961 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
962 {
963   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
964   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
965   int        m = A->m,*ip,N,*ailen = a->ilen;
966   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
967   MatScalar  *aa = a->a,*ap;
968 
969   PetscFunctionBegin;
970   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
971 
972   if (m) rmax = ailen[0];
973   for (i=1; i<mbs; i++) {
974     /* move each row back by the amount of empty slots (fshift) before it*/
975     fshift += imax[i-1] - ailen[i-1];
976     rmax   = PetscMax(rmax,ailen[i]);
977     if (fshift) {
978       ip = aj + ai[i]; ap = aa + bs2*ai[i];
979       N = ailen[i];
980       for (j=0; j<N; j++) {
981         ip[j-fshift] = ip[j];
982         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
983       }
984     }
985     ai[i] = ai[i-1] + ailen[i-1];
986   }
987   if (mbs) {
988     fshift += imax[mbs-1] - ailen[mbs-1];
989     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
990   }
991   /* reset ilen and imax for each row */
992   for (i=0; i<mbs; i++) {
993     ailen[i] = imax[i] = ai[i+1] - ai[i];
994   }
995   a->nz = ai[mbs];
996 
997   /* diagonals may have moved, so kill the diagonal pointers */
998   if (fshift && a->diag) {
999     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1000     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1001     a->diag = 0;
1002   }
1003   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1004   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1005   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1006   a->reallocs          = 0;
1007   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1008 
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 /*
1015    This function returns an array of flags which indicate the locations of contiguous
1016    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1017    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1018    Assume: sizes should be long enough to hold all the values.
1019 */
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1022 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1023 {
1024   int        i,j,k,row;
1025   PetscTruth flg;
1026 
1027   PetscFunctionBegin;
1028   for (i=0,j=0; i<n; j++) {
1029     row = idx[i];
1030     if (row%bs!=0) { /* Not the begining of a block */
1031       sizes[j] = 1;
1032       i++;
1033     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1034       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1035       i++;
1036     } else { /* Begining of the block, so check if the complete block exists */
1037       flg = PETSC_TRUE;
1038       for (k=1; k<bs; k++) {
1039         if (row+k != idx[i+k]) { /* break in the block */
1040           flg = PETSC_FALSE;
1041           break;
1042         }
1043       }
1044       if (flg == PETSC_TRUE) { /* No break in the bs */
1045         sizes[j] = bs;
1046         i+= bs;
1047       } else {
1048         sizes[j] = 1;
1049         i++;
1050       }
1051     }
1052   }
1053   *bs_max = j;
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1059 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1060 {
1061   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1062   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1063   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1064   PetscScalar zero = 0.0;
1065   MatScalar   *aa;
1066 
1067   PetscFunctionBegin;
1068   /* Make a copy of the IS and  sort it */
1069   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1070   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1071 
1072   /* allocate memory for rows,sizes */
1073   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1074   sizes = rows + is_n;
1075 
1076   /* copy IS values to rows, and sort them */
1077   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1078   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1079   if (baij->keepzeroedrows) {
1080     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1081     bs_max = is_n;
1082   } else {
1083     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1084   }
1085   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1086 
1087   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1088     row   = rows[j];
1089     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1090     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1091     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1092     if (sizes[i] == bs && !baij->keepzeroedrows) {
1093       if (diag) {
1094         if (baij->ilen[row/bs] > 0) {
1095           baij->ilen[row/bs]       = 1;
1096           baij->j[baij->i[row/bs]] = row/bs;
1097           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1098         }
1099         /* Now insert all the diagonal values for this bs */
1100         for (k=0; k<bs; k++) {
1101           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1102         }
1103       } else { /* (!diag) */
1104         baij->ilen[row/bs] = 0;
1105       } /* end (!diag) */
1106     } else { /* (sizes[i] != bs) */
1107 #if defined (PETSC_USE_DEBUG)
1108       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1109 #endif
1110       for (k=0; k<count; k++) {
1111         aa[0] =  zero;
1112         aa    += bs;
1113       }
1114       if (diag) {
1115         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1116       }
1117     }
1118   }
1119 
1120   ierr = PetscFree(rows);CHKERRQ(ierr);
1121   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1127 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1128 {
1129   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1130   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1131   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1132   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1133   int         ridx,cidx,bs2=a->bs2,ierr;
1134   PetscTruth  roworiented=a->roworiented;
1135   MatScalar   *ap,value,*aa=a->a,*bap;
1136 
1137   PetscFunctionBegin;
1138   for (k=0; k<m; k++) { /* loop over added rows */
1139     row  = im[k]; brow = row/bs;
1140     if (row < 0) continue;
1141 #if defined(PETSC_USE_BOPT_g)
1142     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1143 #endif
1144     rp   = aj + ai[brow];
1145     ap   = aa + bs2*ai[brow];
1146     rmax = imax[brow];
1147     nrow = ailen[brow];
1148     low  = 0;
1149     for (l=0; l<n; l++) { /* loop over added columns */
1150       if (in[l] < 0) continue;
1151 #if defined(PETSC_USE_BOPT_g)
1152       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1153 #endif
1154       col = in[l]; bcol = col/bs;
1155       ridx = row % bs; cidx = col % bs;
1156       if (roworiented) {
1157         value = v[l + k*n];
1158       } else {
1159         value = v[k + l*m];
1160       }
1161       if (!sorted) low = 0; high = nrow;
1162       while (high-low > 7) {
1163         t = (low+high)/2;
1164         if (rp[t] > bcol) high = t;
1165         else              low  = t;
1166       }
1167       for (i=low; i<high; i++) {
1168         if (rp[i] > bcol) break;
1169         if (rp[i] == bcol) {
1170           bap  = ap +  bs2*i + bs*cidx + ridx;
1171           if (is == ADD_VALUES) *bap += value;
1172           else                  *bap  = value;
1173           goto noinsert1;
1174         }
1175       }
1176       if (nonew == 1) goto noinsert1;
1177       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1178       if (nrow >= rmax) {
1179         /* there is no extra room in row, therefore enlarge */
1180         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1181         MatScalar *new_a;
1182 
1183         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1184 
1185         /* Malloc new storage space */
1186         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1187 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1188         new_j   = (int*)(new_a + bs2*new_nz);
1189         new_i   = new_j + new_nz;
1190 
1191         /* copy over old data into new slots */
1192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1193         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1194         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1195         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1196         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1197         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1198         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1199         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1200         /* free up old matrix storage */
1201         ierr = PetscFree(a->a);CHKERRQ(ierr);
1202         if (!a->singlemalloc) {
1203           ierr = PetscFree(a->i);CHKERRQ(ierr);
1204           ierr = PetscFree(a->j);CHKERRQ(ierr);
1205         }
1206         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1207         a->singlemalloc = PETSC_TRUE;
1208 
1209         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1210         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1211         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1212         a->maxnz += bs2*CHUNKSIZE;
1213         a->reallocs++;
1214         a->nz++;
1215       }
1216       N = nrow++ - 1;
1217       /* shift up all the later entries in this row */
1218       for (ii=N; ii>=i; ii--) {
1219         rp[ii+1] = rp[ii];
1220         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1221       }
1222       if (N>=i) {
1223         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1224       }
1225       rp[i]                      = bcol;
1226       ap[bs2*i + bs*cidx + ridx] = value;
1227       noinsert1:;
1228       low = i;
1229     }
1230     ailen[brow] = nrow;
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1238 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1239 {
1240   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1241   Mat         outA;
1242   int         ierr;
1243   PetscTruth  row_identity,col_identity;
1244 
1245   PetscFunctionBegin;
1246   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1247   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1248   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1249   if (!row_identity || !col_identity) {
1250     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1251   }
1252 
1253   outA          = inA;
1254   inA->factor   = FACTOR_LU;
1255 
1256   if (!a->diag) {
1257     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1258   }
1259 
1260   a->row        = row;
1261   a->col        = col;
1262   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1263   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1264 
1265   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1266   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1267   PetscLogObjectParent(inA,a->icol);
1268 
1269   /*
1270       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1271       for ILU(0) factorization with natural ordering
1272   */
1273   if (a->bs < 8) {
1274     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1275   } else {
1276     if (!a->solve_work) {
1277       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1278       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1279     }
1280   }
1281 
1282   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1283 
1284   PetscFunctionReturn(0);
1285 }
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1288 int MatPrintHelp_SeqBAIJ(Mat A)
1289 {
1290   static PetscTruth called = PETSC_FALSE;
1291   MPI_Comm          comm = A->comm;
1292   int               ierr;
1293 
1294   PetscFunctionBegin;
1295   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1296   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1297   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 EXTERN_C_BEGIN
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1304 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1305 {
1306   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1307   int         i,nz,nbs;
1308 
1309   PetscFunctionBegin;
1310   nz  = baij->maxnz/baij->bs2;
1311   nbs = baij->nbs;
1312   for (i=0; i<nz; i++) {
1313     baij->j[i] = indices[i];
1314   }
1315   baij->nz = nz;
1316   for (i=0; i<nbs; i++) {
1317     baij->ilen[i] = baij->imax[i];
1318   }
1319 
1320   PetscFunctionReturn(0);
1321 }
1322 EXTERN_C_END
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1326 /*@
1327     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1328        in the matrix.
1329 
1330   Input Parameters:
1331 +  mat - the SeqBAIJ matrix
1332 -  indices - the column indices
1333 
1334   Level: advanced
1335 
1336   Notes:
1337     This can be called if you have precomputed the nonzero structure of the
1338   matrix and want to provide it to the matrix object to improve the performance
1339   of the MatSetValues() operation.
1340 
1341     You MUST have set the correct numbers of nonzeros per row in the call to
1342   MatCreateSeqBAIJ().
1343 
1344     MUST be called before any calls to MatSetValues();
1345 
1346 @*/
1347 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1348 {
1349   int ierr,(*f)(Mat,int *);
1350 
1351   PetscFunctionBegin;
1352   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1353   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1354   if (f) {
1355     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1356   } else {
1357     SETERRQ(1,"Wrong type of matrix to set column indices");
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1364 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1365 {
1366   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1367   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1368   PetscReal    atmp;
1369   PetscScalar  *x,zero = 0.0;
1370   MatScalar    *aa;
1371   int          ncols,brow,krow,kcol;
1372 
1373   PetscFunctionBegin;
1374   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1375   bs   = a->bs;
1376   aa   = a->a;
1377   ai   = a->i;
1378   aj   = a->j;
1379   mbs = a->mbs;
1380 
1381   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1382   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1383   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1384   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1385   for (i=0; i<mbs; i++) {
1386     ncols = ai[1] - ai[0]; ai++;
1387     brow  = bs*i;
1388     for (j=0; j<ncols; j++){
1389       /* bcol = bs*(*aj); */
1390       for (kcol=0; kcol<bs; kcol++){
1391         for (krow=0; krow<bs; krow++){
1392           atmp = PetscAbsScalar(*aa); aa++;
1393           row = brow + krow;    /* row index */
1394           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1395           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1396         }
1397       }
1398       aj++;
1399     }
1400   }
1401   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1407 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1408 {
1409   int        ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1418 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1419 {
1420   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1421   PetscFunctionBegin;
1422   *array = a->a;
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1428 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1429 {
1430   PetscFunctionBegin;
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #include "petscblaslapack.h"
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1437 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1438 {
1439   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1440   int          ierr,one=1,i,bs=y->bs,j,bs2;
1441 
1442   PetscFunctionBegin;
1443   if (str == SAME_NONZERO_PATTERN) {
1444     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1445   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1446     if (y->xtoy && y->XtoY != X) {
1447       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1448       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1449     }
1450     if (!y->xtoy) { /* get xtoy */
1451       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1452       y->XtoY = X;
1453     }
1454     bs2 = bs*bs;
1455     for (i=0; i<x->nz; i++) {
1456       j = 0;
1457       while (j < bs2){
1458         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1459         j++;
1460       }
1461     }
1462     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1463   } else {
1464     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 /* -------------------------------------------------------------------*/
1470 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1471        MatGetRow_SeqBAIJ,
1472        MatRestoreRow_SeqBAIJ,
1473        MatMult_SeqBAIJ_N,
1474 /* 4*/ MatMultAdd_SeqBAIJ_N,
1475        MatMultTranspose_SeqBAIJ,
1476        MatMultTransposeAdd_SeqBAIJ,
1477        MatSolve_SeqBAIJ_N,
1478        0,
1479        0,
1480 /*10*/ 0,
1481        MatLUFactor_SeqBAIJ,
1482        0,
1483        0,
1484        MatTranspose_SeqBAIJ,
1485 /*15*/ MatGetInfo_SeqBAIJ,
1486        MatEqual_SeqBAIJ,
1487        MatGetDiagonal_SeqBAIJ,
1488        MatDiagonalScale_SeqBAIJ,
1489        MatNorm_SeqBAIJ,
1490 /*20*/ 0,
1491        MatAssemblyEnd_SeqBAIJ,
1492        0,
1493        MatSetOption_SeqBAIJ,
1494        MatZeroEntries_SeqBAIJ,
1495 /*25*/ MatZeroRows_SeqBAIJ,
1496        MatLUFactorSymbolic_SeqBAIJ,
1497        MatLUFactorNumeric_SeqBAIJ_N,
1498        0,
1499        0,
1500 /*30*/ MatSetUpPreallocation_SeqBAIJ,
1501        MatILUFactorSymbolic_SeqBAIJ,
1502        0,
1503        MatGetArray_SeqBAIJ,
1504        MatRestoreArray_SeqBAIJ,
1505 /*35*/ MatDuplicate_SeqBAIJ,
1506        0,
1507        0,
1508        MatILUFactor_SeqBAIJ,
1509        0,
1510 /*40*/ MatAXPY_SeqBAIJ,
1511        MatGetSubMatrices_SeqBAIJ,
1512        MatIncreaseOverlap_SeqBAIJ,
1513        MatGetValues_SeqBAIJ,
1514        0,
1515 /*45*/ MatPrintHelp_SeqBAIJ,
1516        MatScale_SeqBAIJ,
1517        0,
1518        0,
1519        0,
1520 /*50*/ MatGetBlockSize_SeqBAIJ,
1521        MatGetRowIJ_SeqBAIJ,
1522        MatRestoreRowIJ_SeqBAIJ,
1523        0,
1524        0,
1525 /*55*/ 0,
1526        0,
1527        0,
1528        0,
1529        MatSetValuesBlocked_SeqBAIJ,
1530 /*60*/ MatGetSubMatrix_SeqBAIJ,
1531        MatDestroy_SeqBAIJ,
1532        MatView_SeqBAIJ,
1533        MatGetPetscMaps_Petsc,
1534        0,
1535 /*65*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*70*/ MatGetRowMax_SeqBAIJ,
1541        MatConvert_Basic,
1542        0,
1543        0,
1544        0,
1545 /*75*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*80*/ 0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*85*/ MatLoad_SeqBAIJ
1556 };
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1561 int MatStoreValues_SeqBAIJ(Mat mat)
1562 {
1563   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1564   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1565   int         ierr;
1566 
1567   PetscFunctionBegin;
1568   if (aij->nonew != 1) {
1569     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1570   }
1571 
1572   /* allocate space for values if not already there */
1573   if (!aij->saved_values) {
1574     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1575   }
1576 
1577   /* copy values over */
1578   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 EXTERN_C_END
1582 
1583 EXTERN_C_BEGIN
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1586 int MatRetrieveValues_SeqBAIJ(Mat mat)
1587 {
1588   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1589   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1590 
1591   PetscFunctionBegin;
1592   if (aij->nonew != 1) {
1593     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1594   }
1595   if (!aij->saved_values) {
1596     SETERRQ(1,"Must call MatStoreValues(A);first");
1597   }
1598 
1599   /* copy values over */
1600   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 EXTERN_C_END
1604 
1605 EXTERN_C_BEGIN
1606 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1607 EXTERN_C_END
1608 
1609 EXTERN_C_BEGIN
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1612 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1613 {
1614   Mat_SeqBAIJ *b;
1615   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1616   PetscTruth  flg;
1617 
1618   PetscFunctionBegin;
1619 
1620   B->preallocated = PETSC_TRUE;
1621   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1622   if (nnz && newbs != bs) {
1623     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1624   }
1625   bs   = newbs;
1626 
1627   mbs  = B->m/bs;
1628   nbs  = B->n/bs;
1629   bs2  = bs*bs;
1630 
1631   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1632     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1633   }
1634 
1635   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1636   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1637   if (nnz) {
1638     for (i=0; i<mbs; i++) {
1639       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1640       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1641     }
1642   }
1643 
1644   b       = (Mat_SeqBAIJ*)B->data;
1645   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1646   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1647   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1648   if (!flg) {
1649     switch (bs) {
1650     case 1:
1651       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1652       B->ops->mult            = MatMult_SeqBAIJ_1;
1653       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1654       break;
1655     case 2:
1656       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1657       B->ops->mult            = MatMult_SeqBAIJ_2;
1658       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1659       break;
1660     case 3:
1661       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1662       B->ops->mult            = MatMult_SeqBAIJ_3;
1663       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1664       break;
1665     case 4:
1666       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1667       B->ops->mult            = MatMult_SeqBAIJ_4;
1668       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1669       break;
1670     case 5:
1671       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1672       B->ops->mult            = MatMult_SeqBAIJ_5;
1673       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1674       break;
1675     case 6:
1676       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1677       B->ops->mult            = MatMult_SeqBAIJ_6;
1678       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1679       break;
1680     case 7:
1681       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1682       B->ops->mult            = MatMult_SeqBAIJ_7;
1683       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1684       break;
1685     default:
1686       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1687       B->ops->mult            = MatMult_SeqBAIJ_N;
1688       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1689       break;
1690     }
1691   }
1692   b->bs      = bs;
1693   b->mbs     = mbs;
1694   b->nbs     = nbs;
1695   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1696   if (!nnz) {
1697     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1698     else if (nz <= 0)        nz = 1;
1699     for (i=0; i<mbs; i++) b->imax[i] = nz;
1700     nz = nz*mbs;
1701   } else {
1702     nz = 0;
1703     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1704   }
1705 
1706   /* allocate the matrix space */
1707   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1708   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1709   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1710   b->j  = (int*)(b->a + nz*bs2);
1711   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1712   b->i  = b->j + nz;
1713   b->singlemalloc = PETSC_TRUE;
1714 
1715   b->i[0] = 0;
1716   for (i=1; i<mbs+1; i++) {
1717     b->i[i] = b->i[i-1] + b->imax[i-1];
1718   }
1719 
1720   /* b->ilen will count nonzeros in each block row so far. */
1721   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1722   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1723   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1724 
1725   b->bs               = bs;
1726   b->bs2              = bs2;
1727   b->mbs              = mbs;
1728   b->nz               = 0;
1729   b->maxnz            = nz*bs2;
1730   B->info.nz_unneeded = (PetscReal)b->maxnz;
1731   PetscFunctionReturn(0);
1732 }
1733 EXTERN_C_END
1734 
1735 /*MC
1736    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1737    block sparse compressed row format.
1738 
1739    Options Database Keys:
1740 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1741 
1742   Level: beginner
1743 
1744 .seealso: MatCreateSeqBAIJ
1745 M*/
1746 
1747 EXTERN_C_BEGIN
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatCreate_SeqBAIJ"
1750 int MatCreate_SeqBAIJ(Mat B)
1751 {
1752   int         ierr,size;
1753   Mat_SeqBAIJ *b;
1754 
1755   PetscFunctionBegin;
1756   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1757   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1758 
1759   B->m = B->M = PetscMax(B->m,B->M);
1760   B->n = B->N = PetscMax(B->n,B->N);
1761   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1762   B->data = (void*)b;
1763   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1764   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1765   B->factor           = 0;
1766   B->lupivotthreshold = 1.0;
1767   B->mapping          = 0;
1768   b->row              = 0;
1769   b->col              = 0;
1770   b->icol             = 0;
1771   b->reallocs         = 0;
1772   b->saved_values     = 0;
1773 #if defined(PETSC_USE_MAT_SINGLE)
1774   b->setvalueslen     = 0;
1775   b->setvaluescopy    = PETSC_NULL;
1776 #endif
1777 
1778   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1779   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1780 
1781   b->sorted           = PETSC_FALSE;
1782   b->roworiented      = PETSC_TRUE;
1783   b->nonew            = 0;
1784   b->diag             = 0;
1785   b->solve_work       = 0;
1786   b->mult_work        = 0;
1787   B->spptr            = 0;
1788   B->info.nz_unneeded = (PetscReal)b->maxnz;
1789   b->keepzeroedrows   = PETSC_FALSE;
1790   b->xtoy              = 0;
1791   b->XtoY              = 0;
1792 
1793   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1794                                      "MatStoreValues_SeqBAIJ",
1795                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1797                                      "MatRetrieveValues_SeqBAIJ",
1798                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1800                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1801                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1803                                      "MatConvert_SeqBAIJ_SeqAIJ",
1804                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1806                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1807                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 EXTERN_C_END
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1814 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1815 {
1816   Mat         C;
1817   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1818   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1819 
1820   PetscFunctionBegin;
1821   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1822 
1823   *B = 0;
1824   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1825   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1826   c    = (Mat_SeqBAIJ*)C->data;
1827 
1828   c->bs         = a->bs;
1829   c->bs2        = a->bs2;
1830   c->mbs        = a->mbs;
1831   c->nbs        = a->nbs;
1832   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1833 
1834   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1835   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1836   for (i=0; i<mbs; i++) {
1837     c->imax[i] = a->imax[i];
1838     c->ilen[i] = a->ilen[i];
1839   }
1840 
1841   /* allocate the matrix space */
1842   c->singlemalloc = PETSC_TRUE;
1843   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1844   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1845   c->j = (int*)(c->a + nz*bs2);
1846   c->i = c->j + nz;
1847   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1848   if (mbs > 0) {
1849     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1850     if (cpvalues == MAT_COPY_VALUES) {
1851       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1852     } else {
1853       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1854     }
1855   }
1856 
1857   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1858   c->sorted      = a->sorted;
1859   c->roworiented = a->roworiented;
1860   c->nonew       = a->nonew;
1861 
1862   if (a->diag) {
1863     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1864     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1865     for (i=0; i<mbs; i++) {
1866       c->diag[i] = a->diag[i];
1867     }
1868   } else c->diag        = 0;
1869   c->nz                 = a->nz;
1870   c->maxnz              = a->maxnz;
1871   c->solve_work         = 0;
1872   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1873   c->mult_work          = 0;
1874   C->preallocated       = PETSC_TRUE;
1875   C->assembled          = PETSC_TRUE;
1876   *B = C;
1877   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatLoad_SeqBAIJ"
1883 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1884 {
1885   Mat_SeqBAIJ  *a;
1886   Mat          B;
1887   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1888   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1889   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1890   int          *masked,nmask,tmp,bs2,ishift;
1891   PetscScalar  *aa;
1892   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1893 
1894   PetscFunctionBegin;
1895   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1896   bs2  = bs*bs;
1897 
1898   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1899   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1900   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1901   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1902   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1903   M = header[1]; N = header[2]; nz = header[3];
1904 
1905   if (header[3] < 0) {
1906     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1907   }
1908 
1909   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1910 
1911   /*
1912      This code adds extra rows to make sure the number of rows is
1913     divisible by the blocksize
1914   */
1915   mbs        = M/bs;
1916   extra_rows = bs - M + bs*(mbs);
1917   if (extra_rows == bs) extra_rows = 0;
1918   else                  mbs++;
1919   if (extra_rows) {
1920     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1921   }
1922 
1923   /* read in row lengths */
1924   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1925   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1926   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1927 
1928   /* read in column indices */
1929   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1930   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1931   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1932 
1933   /* loop over row lengths determining block row lengths */
1934   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1935   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1936   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1937   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1938   masked   = mask + mbs;
1939   rowcount = 0; nzcount = 0;
1940   for (i=0; i<mbs; i++) {
1941     nmask = 0;
1942     for (j=0; j<bs; j++) {
1943       kmax = rowlengths[rowcount];
1944       for (k=0; k<kmax; k++) {
1945         tmp = jj[nzcount++]/bs;
1946         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1947       }
1948       rowcount++;
1949     }
1950     browlengths[i] += nmask;
1951     /* zero out the mask elements we set */
1952     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1953   }
1954 
1955   /* create our matrix */
1956   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1957   ierr = MatSetType(B,type);CHKERRQ(ierr);
1958   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1959   a = (Mat_SeqBAIJ*)B->data;
1960 
1961   /* set matrix "i" values */
1962   a->i[0] = 0;
1963   for (i=1; i<= mbs; i++) {
1964     a->i[i]      = a->i[i-1] + browlengths[i-1];
1965     a->ilen[i-1] = browlengths[i-1];
1966   }
1967   a->nz         = 0;
1968   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1969 
1970   /* read in nonzero values */
1971   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1972   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1973   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1974 
1975   /* set "a" and "j" values into matrix */
1976   nzcount = 0; jcount = 0;
1977   for (i=0; i<mbs; i++) {
1978     nzcountb = nzcount;
1979     nmask    = 0;
1980     for (j=0; j<bs; j++) {
1981       kmax = rowlengths[i*bs+j];
1982       for (k=0; k<kmax; k++) {
1983         tmp = jj[nzcount++]/bs;
1984 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1985       }
1986     }
1987     /* sort the masked values */
1988     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1989 
1990     /* set "j" values into matrix */
1991     maskcount = 1;
1992     for (j=0; j<nmask; j++) {
1993       a->j[jcount++]  = masked[j];
1994       mask[masked[j]] = maskcount++;
1995     }
1996     /* set "a" values into matrix */
1997     ishift = bs2*a->i[i];
1998     for (j=0; j<bs; j++) {
1999       kmax = rowlengths[i*bs+j];
2000       for (k=0; k<kmax; k++) {
2001         tmp       = jj[nzcountb]/bs ;
2002         block     = mask[tmp] - 1;
2003         point     = jj[nzcountb] - bs*tmp;
2004         idx       = ishift + bs2*block + j + bs*point;
2005         a->a[idx] = (MatScalar)aa[nzcountb++];
2006       }
2007     }
2008     /* zero out the mask elements we set */
2009     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2010   }
2011   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2012 
2013   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2014   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2015   ierr = PetscFree(aa);CHKERRQ(ierr);
2016   ierr = PetscFree(jj);CHKERRQ(ierr);
2017   ierr = PetscFree(mask);CHKERRQ(ierr);
2018 
2019   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2020   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2021   ierr = MatView_Private(B);CHKERRQ(ierr);
2022 
2023   *A = B;
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatCreateSeqBAIJ"
2029 /*@C
2030    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2031    compressed row) format.  For good matrix assembly performance the
2032    user should preallocate the matrix storage by setting the parameter nz
2033    (or the array nnz).  By setting these parameters accurately, performance
2034    during matrix assembly can be increased by more than a factor of 50.
2035 
2036    Collective on MPI_Comm
2037 
2038    Input Parameters:
2039 +  comm - MPI communicator, set to PETSC_COMM_SELF
2040 .  bs - size of block
2041 .  m - number of rows
2042 .  n - number of columns
2043 .  nz - number of nonzero blocks  per block row (same for all rows)
2044 -  nnz - array containing the number of nonzero blocks in the various block rows
2045          (possibly different for each block row) or PETSC_NULL
2046 
2047    Output Parameter:
2048 .  A - the matrix
2049 
2050    Options Database Keys:
2051 .   -mat_no_unroll - uses code that does not unroll the loops in the
2052                      block calculations (much slower)
2053 .    -mat_block_size - size of the blocks to use
2054 
2055    Level: intermediate
2056 
2057    Notes:
2058    A nonzero block is any block that as 1 or more nonzeros in it
2059 
2060    The block AIJ format is fully compatible with standard Fortran 77
2061    storage.  That is, the stored row and column indices can begin at
2062    either one (as in Fortran) or zero.  See the users' manual for details.
2063 
2064    Specify the preallocated storage with either nz or nnz (not both).
2065    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2066    allocation.  For additional details, see the users manual chapter on
2067    matrices.
2068 
2069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2070 @*/
2071 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2072 {
2073   int ierr;
2074 
2075   PetscFunctionBegin;
2076   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2077   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2078   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2084 /*@C
2085    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2086    per row in the matrix. For good matrix assembly performance the
2087    user should preallocate the matrix storage by setting the parameter nz
2088    (or the array nnz).  By setting these parameters accurately, performance
2089    during matrix assembly can be increased by more than a factor of 50.
2090 
2091    Collective on MPI_Comm
2092 
2093    Input Parameters:
2094 +  A - the matrix
2095 .  bs - size of block
2096 .  nz - number of block nonzeros per block row (same for all rows)
2097 -  nnz - array containing the number of block nonzeros in the various block rows
2098          (possibly different for each block row) or PETSC_NULL
2099 
2100    Options Database Keys:
2101 .   -mat_no_unroll - uses code that does not unroll the loops in the
2102                      block calculations (much slower)
2103 .    -mat_block_size - size of the blocks to use
2104 
2105    Level: intermediate
2106 
2107    Notes:
2108    The block AIJ format is fully compatible with standard Fortran 77
2109    storage.  That is, the stored row and column indices can begin at
2110    either one (as in Fortran) or zero.  See the users' manual for details.
2111 
2112    Specify the preallocated storage with either nz or nnz (not both).
2113    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2114    allocation.  For additional details, see the users manual chapter on
2115    matrices.
2116 
2117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2118 @*/
2119 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2120 {
2121   int ierr,(*f)(Mat,int,int,const int[]);
2122 
2123   PetscFunctionBegin;
2124   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2125   if (f) {
2126     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130