xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a2ec6df8280ae283e8b0f45d20ebe04f5d3dfc7b)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25 {
26   Mat               A = *AA;
27   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
28   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int               *ai=a->i,*ailen=a->ilen;
30   int               *aj=a->j,stepval;
31   const PetscScalar *value = v;
32   MatScalar         *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 
163 #define CHUNKSIZE  10
164 
165 /*
166      Checks for missing diagonals
167 */
168 #undef __FUNCT__
169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
170 int MatMissingDiagonal_SeqBAIJ(Mat A)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173   int         *diag,*jj = a->j,i,ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
177   diag = a->diag;
178   for (i=0; i<a->mbs; i++) {
179     if (jj[diag[i]] != i) {
180       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181     }
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
188 int MatMarkDiagonal_SeqBAIJ(Mat A)
189 {
190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191   int         i,j,*diag,m = a->mbs,ierr;
192 
193   PetscFunctionBegin;
194   if (a->diag) PetscFunctionReturn(0);
195 
196   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
197   PetscLogObjectMemory(A,(m+1)*sizeof(int));
198   for (i=0; i<m; i++) {
199     diag[i] = a->i[i+1];
200     for (j=a->i[i]; j<a->i[i+1]; j++) {
201       if (a->j[j] == i) {
202         diag[i] = j;
203         break;
204       }
205     }
206   }
207   a->diag = diag;
208   PetscFunctionReturn(0);
209 }
210 
211 
212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217 {
218   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219   int         ierr,n = a->mbs,i;
220 
221   PetscFunctionBegin;
222   *nn = n;
223   if (!ia) PetscFunctionReturn(0);
224   if (symmetric) {
225     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
226   } else if (oshift == 1) {
227     /* temporarily add 1 to i and j indices */
228     int nz = a->i[n];
229     for (i=0; i<nz; i++) a->j[i]++;
230     for (i=0; i<n+1; i++) a->i[i]++;
231     *ia = a->i; *ja = a->j;
232   } else {
233     *ia = a->i; *ja = a->j;
234   }
235 
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242 {
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244   int         i,n = a->mbs,ierr;
245 
246   PetscFunctionBegin;
247   if (!ia) PetscFunctionReturn(0);
248   if (symmetric) {
249     ierr = PetscFree(*ia);CHKERRQ(ierr);
250     ierr = PetscFree(*ja);CHKERRQ(ierr);
251   } else if (oshift == 1) {
252     int nz = a->i[n]-1;
253     for (i=0; i<nz; i++) a->j[i]--;
254     for (i=0; i<n+1; i++) a->i[i]--;
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262 {
263   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
264 
265   PetscFunctionBegin;
266   *bs = baij->bs;
267   PetscFunctionReturn(0);
268 }
269 
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_SeqBAIJ"
273 int MatDestroy_SeqBAIJ(Mat A)
274 {
275   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276   int         ierr;
277 
278   PetscFunctionBegin;
279 #if defined(PETSC_USE_LOG)
280   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281 #endif
282   ierr = PetscFree(a->a);CHKERRQ(ierr);
283   if (!a->singlemalloc) {
284     ierr = PetscFree(a->i);CHKERRQ(ierr);
285     ierr = PetscFree(a->j);CHKERRQ(ierr);
286   }
287   if (a->row) {
288     ierr = ISDestroy(a->row);CHKERRQ(ierr);
289   }
290   if (a->col) {
291     ierr = ISDestroy(a->col);CHKERRQ(ierr);
292   }
293   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
294   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
295   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
296   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
297   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
298   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
299   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
300 #if defined(PETSC_USE_MAT_SINGLE)
301   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
302 #endif
303   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
304 
305   ierr = PetscFree(a);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSetOption_SeqBAIJ"
311 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312 {
313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
314 
315   PetscFunctionBegin;
316   switch (op) {
317   case MAT_ROW_ORIENTED:
318     a->roworiented    = PETSC_TRUE;
319     break;
320   case MAT_COLUMN_ORIENTED:
321     a->roworiented    = PETSC_FALSE;
322     break;
323   case MAT_COLUMNS_SORTED:
324     a->sorted         = PETSC_TRUE;
325     break;
326   case MAT_COLUMNS_UNSORTED:
327     a->sorted         = PETSC_FALSE;
328     break;
329   case MAT_KEEP_ZEROED_ROWS:
330     a->keepzeroedrows = PETSC_TRUE;
331     break;
332   case MAT_NO_NEW_NONZERO_LOCATIONS:
333     a->nonew          = 1;
334     break;
335   case MAT_NEW_NONZERO_LOCATION_ERR:
336     a->nonew          = -1;
337     break;
338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339     a->nonew          = -2;
340     break;
341   case MAT_YES_NEW_NONZERO_LOCATIONS:
342     a->nonew          = 0;
343     break;
344   case MAT_ROWS_SORTED:
345   case MAT_ROWS_UNSORTED:
346   case MAT_YES_NEW_DIAGONALS:
347   case MAT_IGNORE_OFF_PROC_ENTRIES:
348   case MAT_USE_HASH_TABLE:
349     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350     break;
351   case MAT_NO_NEW_DIAGONALS:
352     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353   default:
354     SETERRQ(PETSC_ERR_SUP,"unknown option");
355   }
356   PetscFunctionReturn(0);
357 }
358 
359 #undef __FUNCT__
360 #define __FUNCT__ "MatGetRow_SeqBAIJ"
361 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
362 {
363   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
364   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
365   MatScalar    *aa,*aa_i;
366   PetscScalar  *v_i;
367 
368   PetscFunctionBegin;
369   bs  = a->bs;
370   ai  = a->i;
371   aj  = a->j;
372   aa  = a->a;
373   bs2 = a->bs2;
374 
375   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
376 
377   bn  = row/bs;   /* Block number */
378   bp  = row % bs; /* Block Position */
379   M   = ai[bn+1] - ai[bn];
380   *nz = bs*M;
381 
382   if (v) {
383     *v = 0;
384     if (*nz) {
385       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
386       for (i=0; i<M; i++) { /* for each block in the block row */
387         v_i  = *v + i*bs;
388         aa_i = aa + bs2*(ai[bn] + i);
389         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
390       }
391     }
392   }
393 
394   if (idx) {
395     *idx = 0;
396     if (*nz) {
397       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
398       for (i=0; i<M; i++) { /* for each block in the block row */
399         idx_i = *idx + i*bs;
400         itmp  = bs*aj[ai[bn] + i];
401         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
410 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
411 {
412   int ierr;
413 
414   PetscFunctionBegin;
415   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
416   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatTranspose_SeqBAIJ"
422 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
423 {
424   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
425   Mat         C;
426   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
427   int         *rows,*cols,bs2=a->bs2;
428   PetscScalar *array;
429 
430   PetscFunctionBegin;
431   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
432   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
433   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
434 
435 #if defined(PETSC_USE_MAT_SINGLE)
436   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
437   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
438 #else
439   array = a->a;
440 #endif
441 
442   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
443   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
444   ierr = PetscFree(col);CHKERRQ(ierr);
445   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
446   cols = rows + bs;
447   for (i=0; i<mbs; i++) {
448     cols[0] = i*bs;
449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
450     len = ai[i+1] - ai[i];
451     for (j=0; j<len; j++) {
452       rows[0] = (*aj++)*bs;
453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
454       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
455       array += bs2;
456     }
457   }
458   ierr = PetscFree(rows);CHKERRQ(ierr);
459 #if defined(PETSC_USE_MAT_SINGLE)
460   ierr = PetscFree(array);
461 #endif
462 
463   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
464   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
465 
466   if (B) {
467     *B = C;
468   } else {
469     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
476 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
477 {
478   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
479   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
480   PetscScalar *aa;
481   FILE        *file;
482 
483   PetscFunctionBegin;
484   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
485   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
486   col_lens[0] = MAT_FILE_COOKIE;
487 
488   col_lens[1] = A->m;
489   col_lens[2] = A->n;
490   col_lens[3] = a->nz*bs2;
491 
492   /* store lengths of each row and write (including header) to file */
493   count = 0;
494   for (i=0; i<a->mbs; i++) {
495     for (j=0; j<bs; j++) {
496       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
497     }
498   }
499   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
500   ierr = PetscFree(col_lens);CHKERRQ(ierr);
501 
502   /* store column indices (zero start index) */
503   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
504   count = 0;
505   for (i=0; i<a->mbs; i++) {
506     for (j=0; j<bs; j++) {
507       for (k=a->i[i]; k<a->i[i+1]; k++) {
508         for (l=0; l<bs; l++) {
509           jj[count++] = bs*a->j[k] + l;
510         }
511       }
512     }
513   }
514   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
515   ierr = PetscFree(jj);CHKERRQ(ierr);
516 
517   /* store nonzero values */
518   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<a->mbs; i++) {
521     for (j=0; j<bs; j++) {
522       for (k=a->i[i]; k<a->i[i+1]; k++) {
523         for (l=0; l<bs; l++) {
524           aa[count++] = a->a[bs2*k + l*bs + j];
525         }
526       }
527     }
528   }
529   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
530   ierr = PetscFree(aa);CHKERRQ(ierr);
531 
532   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
533   if (file) {
534     fprintf(file,"-matload_block_size %d\n",a->bs);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
541 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
545   PetscViewerFormat format;
546 
547   PetscFunctionBegin;
548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
552     Mat aij;
553     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
555     ierr = MatDestroy(aij);CHKERRQ(ierr);
556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
557      PetscFunctionReturn(0);
558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
560     for (i=0; i<a->mbs; i++) {
561       for (j=0; j<bs; j++) {
562         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
563         for (k=a->i[i]; k<a->i[i+1]; k++) {
564           for (l=0; l<bs; l++) {
565 #if defined(PETSC_USE_COMPLEX)
566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
567               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
570               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
574             }
575 #else
576             if (a->a[bs2*k + l*bs + j] != 0.0) {
577               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
578             }
579 #endif
580           }
581         }
582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
583       }
584     }
585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
588     for (i=0; i<a->mbs; i++) {
589       for (j=0; j<bs; j++) {
590         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
591         for (k=a->i[i]; k<a->i[i+1]; k++) {
592           for (l=0; l<bs; l++) {
593 #if defined(PETSC_USE_COMPLEX)
594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
595               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
598               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
600             } else {
601               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
602             }
603 #else
604             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
605 #endif
606           }
607         }
608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
609       }
610     }
611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
612   }
613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
619 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
620 {
621   Mat          A = (Mat) Aa;
622   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
623   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
624   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
625   MatScalar    *aa;
626   PetscViewer  viewer;
627 
628   PetscFunctionBegin;
629 
630   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
631   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
632 
633   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
634 
635   /* loop over matrix elements drawing boxes */
636   color = PETSC_DRAW_BLUE;
637   for (i=0,row=0; i<mbs; i++,row+=bs) {
638     for (j=a->i[i]; j<a->i[i+1]; j++) {
639       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
640       x_l = a->j[j]*bs; x_r = x_l + 1.0;
641       aa = a->a + j*bs2;
642       for (k=0; k<bs; k++) {
643         for (l=0; l<bs; l++) {
644           if (PetscRealPart(*aa++) >=  0.) continue;
645           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
646         }
647       }
648     }
649   }
650   color = PETSC_DRAW_CYAN;
651   for (i=0,row=0; i<mbs; i++,row+=bs) {
652     for (j=a->i[i]; j<a->i[i+1]; j++) {
653       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
654       x_l = a->j[j]*bs; x_r = x_l + 1.0;
655       aa = a->a + j*bs2;
656       for (k=0; k<bs; k++) {
657         for (l=0; l<bs; l++) {
658           if (PetscRealPart(*aa++) != 0.) continue;
659           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664 
665   color = PETSC_DRAW_RED;
666   for (i=0,row=0; i<mbs; i++,row+=bs) {
667     for (j=a->i[i]; j<a->i[i+1]; j++) {
668       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
669       x_l = a->j[j]*bs; x_r = x_l + 1.0;
670       aa = a->a + j*bs2;
671       for (k=0; k<bs; k++) {
672         for (l=0; l<bs; l++) {
673           if (PetscRealPart(*aa++) <= 0.) continue;
674           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
675         }
676       }
677     }
678   }
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
684 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
685 {
686   int          ierr;
687   PetscReal    xl,yl,xr,yr,w,h;
688   PetscDraw    draw;
689   PetscTruth   isnull;
690 
691   PetscFunctionBegin;
692 
693   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
695 
696   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
697   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
698   xr += w;    yr += h;  xl = -w;     yl = -h;
699   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
700   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
701   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
702   PetscFunctionReturn(0);
703 }
704 
705 #undef __FUNCT__
706 #define __FUNCT__ "MatView_SeqBAIJ"
707 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
708 {
709   int        ierr;
710   PetscTruth issocket,isascii,isbinary,isdraw;
711 
712   PetscFunctionBegin;
713   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
717   if (issocket) {
718     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
719   } else if (isascii){
720     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
721   } else if (isbinary) {
722     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
723   } else if (isdraw) {
724     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
725   } else {
726     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
727   }
728   PetscFunctionReturn(0);
729 }
730 
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatGetValues_SeqBAIJ"
734 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
735 {
736   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
737   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
738   int        *ai = a->i,*ailen = a->ilen;
739   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
740   MatScalar  *ap,*aa = a->a,zero = 0.0;
741 
742   PetscFunctionBegin;
743   for (k=0; k<m; k++) { /* loop over rows */
744     row  = im[k]; brow = row/bs;
745     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
746     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
747     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
748     nrow = ailen[brow];
749     for (l=0; l<n; l++) { /* loop over columns */
750       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
751       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
752       col  = in[l] ;
753       bcol = col/bs;
754       cidx = col%bs;
755       ridx = row%bs;
756       high = nrow;
757       low  = 0; /* assume unsorted */
758       while (high-low > 5) {
759         t = (low+high)/2;
760         if (rp[t] > bcol) high = t;
761         else             low  = t;
762       }
763       for (i=low; i<high; i++) {
764         if (rp[i] > bcol) break;
765         if (rp[i] == bcol) {
766           *v++ = ap[bs2*i+bs*cidx+ridx];
767           goto finished;
768         }
769       }
770       *v++ = zero;
771       finished:;
772     }
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 #if defined(PETSC_USE_MAT_SINGLE)
778 #undef __FUNCT__
779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
780 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
781 {
782   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
783   int         ierr,i,N = m*n*b->bs2;
784   MatScalar   *vsingle;
785 
786   PetscFunctionBegin;
787   if (N > b->setvalueslen) {
788     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
789     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
790     b->setvalueslen  = N;
791   }
792   vsingle = b->setvaluescopy;
793   for (i=0; i<N; i++) {
794     vsingle[i] = v[i];
795   }
796   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
797   PetscFunctionReturn(0);
798 }
799 #endif
800 
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
804 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
805 {
806   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
807   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
808   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
809   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
810   PetscTruth        roworiented=a->roworiented;
811   const MatScalar   *value = v;
812   MatScalar         *ap,*aa = a->a,*bap;
813 
814   PetscFunctionBegin;
815   if (roworiented) {
816     stepval = (n-1)*bs;
817   } else {
818     stepval = (m-1)*bs;
819   }
820   for (k=0; k<m; k++) { /* loop over added rows */
821     row  = im[k];
822     if (row < 0) continue;
823 #if defined(PETSC_USE_BOPT_g)
824     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
825 #endif
826     rp   = aj + ai[row];
827     ap   = aa + bs2*ai[row];
828     rmax = imax[row];
829     nrow = ailen[row];
830     low  = 0;
831     for (l=0; l<n; l++) { /* loop over added columns */
832       if (in[l] < 0) continue;
833 #if defined(PETSC_USE_BOPT_g)
834       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
835 #endif
836       col = in[l];
837       if (roworiented) {
838         value = v + k*(stepval+bs)*bs + l*bs;
839       } else {
840         value = v + l*(stepval+bs)*bs + k*bs;
841       }
842       if (!sorted) low = 0; high = nrow;
843       while (high-low > 7) {
844         t = (low+high)/2;
845         if (rp[t] > col) high = t;
846         else             low  = t;
847       }
848       for (i=low; i<high; i++) {
849         if (rp[i] > col) break;
850         if (rp[i] == col) {
851           bap  = ap +  bs2*i;
852           if (roworiented) {
853             if (is == ADD_VALUES) {
854               for (ii=0; ii<bs; ii++,value+=stepval) {
855                 for (jj=ii; jj<bs2; jj+=bs) {
856                   bap[jj] += *value++;
857                 }
858               }
859             } else {
860               for (ii=0; ii<bs; ii++,value+=stepval) {
861                 for (jj=ii; jj<bs2; jj+=bs) {
862                   bap[jj] = *value++;
863                 }
864               }
865             }
866           } else {
867             if (is == ADD_VALUES) {
868               for (ii=0; ii<bs; ii++,value+=stepval) {
869                 for (jj=0; jj<bs; jj++) {
870                   *bap++ += *value++;
871                 }
872               }
873             } else {
874               for (ii=0; ii<bs; ii++,value+=stepval) {
875                 for (jj=0; jj<bs; jj++) {
876                   *bap++  = *value++;
877                 }
878               }
879             }
880           }
881           goto noinsert2;
882         }
883       }
884       if (nonew == 1) goto noinsert2;
885       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
886       if (nrow >= rmax) {
887         /* there is no extra room in row, therefore enlarge */
888         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
889         MatScalar *new_a;
890 
891         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
892 
893         /* malloc new storage space */
894         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
895 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
896         new_j   = (int*)(new_a + bs2*new_nz);
897         new_i   = new_j + new_nz;
898 
899         /* copy over old data into new slots */
900         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
901         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
902         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
903         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
904         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
905         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
906         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
907         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
908         /* free up old matrix storage */
909         ierr = PetscFree(a->a);CHKERRQ(ierr);
910         if (!a->singlemalloc) {
911           ierr = PetscFree(a->i);CHKERRQ(ierr);
912           ierr = PetscFree(a->j);CHKERRQ(ierr);
913         }
914         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
915         a->singlemalloc = PETSC_TRUE;
916 
917         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
918         rmax = imax[row] = imax[row] + CHUNKSIZE;
919         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
920         a->maxnz += bs2*CHUNKSIZE;
921         a->reallocs++;
922         a->nz++;
923       }
924       N = nrow++ - 1;
925       /* shift up all the later entries in this row */
926       for (ii=N; ii>=i; ii--) {
927         rp[ii+1] = rp[ii];
928         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
929       }
930       if (N >= i) {
931         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
932       }
933       rp[i] = col;
934       bap   = ap +  bs2*i;
935       if (roworiented) {
936         for (ii=0; ii<bs; ii++,value+=stepval) {
937           for (jj=ii; jj<bs2; jj+=bs) {
938             bap[jj] = *value++;
939           }
940         }
941       } else {
942         for (ii=0; ii<bs; ii++,value+=stepval) {
943           for (jj=0; jj<bs; jj++) {
944             *bap++  = *value++;
945           }
946         }
947       }
948       noinsert2:;
949       low = i;
950     }
951     ailen[row] = nrow;
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
958 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
959 {
960   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
961   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
962   int        m = A->m,*ip,N,*ailen = a->ilen;
963   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
964   MatScalar  *aa = a->a,*ap;
965 
966   PetscFunctionBegin;
967   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
968 
969   if (m) rmax = ailen[0];
970   for (i=1; i<mbs; i++) {
971     /* move each row back by the amount of empty slots (fshift) before it*/
972     fshift += imax[i-1] - ailen[i-1];
973     rmax   = PetscMax(rmax,ailen[i]);
974     if (fshift) {
975       ip = aj + ai[i]; ap = aa + bs2*ai[i];
976       N = ailen[i];
977       for (j=0; j<N; j++) {
978         ip[j-fshift] = ip[j];
979         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
980       }
981     }
982     ai[i] = ai[i-1] + ailen[i-1];
983   }
984   if (mbs) {
985     fshift += imax[mbs-1] - ailen[mbs-1];
986     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
987   }
988   /* reset ilen and imax for each row */
989   for (i=0; i<mbs; i++) {
990     ailen[i] = imax[i] = ai[i+1] - ai[i];
991   }
992   a->nz = ai[mbs];
993 
994   /* diagonals may have moved, so kill the diagonal pointers */
995   if (fshift && a->diag) {
996     ierr = PetscFree(a->diag);CHKERRQ(ierr);
997     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
998     a->diag = 0;
999   }
1000   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1001   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1002   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1003   a->reallocs          = 0;
1004   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1005 
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 
1010 
1011 /*
1012    This function returns an array of flags which indicate the locations of contiguous
1013    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1014    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1015    Assume: sizes should be long enough to hold all the values.
1016 */
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1019 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1020 {
1021   int        i,j,k,row;
1022   PetscTruth flg;
1023 
1024   PetscFunctionBegin;
1025   for (i=0,j=0; i<n; j++) {
1026     row = idx[i];
1027     if (row%bs!=0) { /* Not the begining of a block */
1028       sizes[j] = 1;
1029       i++;
1030     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1031       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1032       i++;
1033     } else { /* Begining of the block, so check if the complete block exists */
1034       flg = PETSC_TRUE;
1035       for (k=1; k<bs; k++) {
1036         if (row+k != idx[i+k]) { /* break in the block */
1037           flg = PETSC_FALSE;
1038           break;
1039         }
1040       }
1041       if (flg == PETSC_TRUE) { /* No break in the bs */
1042         sizes[j] = bs;
1043         i+= bs;
1044       } else {
1045         sizes[j] = 1;
1046         i++;
1047       }
1048     }
1049   }
1050   *bs_max = j;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1056 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1057 {
1058   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1059   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1060   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1061   PetscScalar zero = 0.0;
1062   MatScalar   *aa;
1063 
1064   PetscFunctionBegin;
1065   /* Make a copy of the IS and  sort it */
1066   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1067   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1068 
1069   /* allocate memory for rows,sizes */
1070   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1071   sizes = rows + is_n;
1072 
1073   /* copy IS values to rows, and sort them */
1074   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1075   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1076   if (baij->keepzeroedrows) {
1077     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1078     bs_max = is_n;
1079   } else {
1080     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1081   }
1082   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1083 
1084   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1085     row   = rows[j];
1086     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1087     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1088     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1089     if (sizes[i] == bs && !baij->keepzeroedrows) {
1090       if (diag) {
1091         if (baij->ilen[row/bs] > 0) {
1092           baij->ilen[row/bs]       = 1;
1093           baij->j[baij->i[row/bs]] = row/bs;
1094           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1095         }
1096         /* Now insert all the diagonal values for this bs */
1097         for (k=0; k<bs; k++) {
1098           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1099         }
1100       } else { /* (!diag) */
1101         baij->ilen[row/bs] = 0;
1102       } /* end (!diag) */
1103     } else { /* (sizes[i] != bs) */
1104 #if defined (PETSC_USE_DEBUG)
1105       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1106 #endif
1107       for (k=0; k<count; k++) {
1108         aa[0] =  zero;
1109         aa    += bs;
1110       }
1111       if (diag) {
1112         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1113       }
1114     }
1115   }
1116 
1117   ierr = PetscFree(rows);CHKERRQ(ierr);
1118   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1124 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1125 {
1126   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1127   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1128   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1129   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1130   int         ridx,cidx,bs2=a->bs2,ierr;
1131   PetscTruth  roworiented=a->roworiented;
1132   MatScalar   *ap,value,*aa=a->a,*bap;
1133 
1134   PetscFunctionBegin;
1135   for (k=0; k<m; k++) { /* loop over added rows */
1136     row  = im[k]; brow = row/bs;
1137     if (row < 0) continue;
1138 #if defined(PETSC_USE_BOPT_g)
1139     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1140 #endif
1141     rp   = aj + ai[brow];
1142     ap   = aa + bs2*ai[brow];
1143     rmax = imax[brow];
1144     nrow = ailen[brow];
1145     low  = 0;
1146     for (l=0; l<n; l++) { /* loop over added columns */
1147       if (in[l] < 0) continue;
1148 #if defined(PETSC_USE_BOPT_g)
1149       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1150 #endif
1151       col = in[l]; bcol = col/bs;
1152       ridx = row % bs; cidx = col % bs;
1153       if (roworiented) {
1154         value = v[l + k*n];
1155       } else {
1156         value = v[k + l*m];
1157       }
1158       if (!sorted) low = 0; high = nrow;
1159       while (high-low > 7) {
1160         t = (low+high)/2;
1161         if (rp[t] > bcol) high = t;
1162         else              low  = t;
1163       }
1164       for (i=low; i<high; i++) {
1165         if (rp[i] > bcol) break;
1166         if (rp[i] == bcol) {
1167           bap  = ap +  bs2*i + bs*cidx + ridx;
1168           if (is == ADD_VALUES) *bap += value;
1169           else                  *bap  = value;
1170           goto noinsert1;
1171         }
1172       }
1173       if (nonew == 1) goto noinsert1;
1174       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1175       if (nrow >= rmax) {
1176         /* there is no extra room in row, therefore enlarge */
1177         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1178         MatScalar *new_a;
1179 
1180         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1181 
1182         /* Malloc new storage space */
1183         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1184 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1185         new_j   = (int*)(new_a + bs2*new_nz);
1186         new_i   = new_j + new_nz;
1187 
1188         /* copy over old data into new slots */
1189         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1190         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1191         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1192         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1193         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1194         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1195         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1196         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1197         /* free up old matrix storage */
1198         ierr = PetscFree(a->a);CHKERRQ(ierr);
1199         if (!a->singlemalloc) {
1200           ierr = PetscFree(a->i);CHKERRQ(ierr);
1201           ierr = PetscFree(a->j);CHKERRQ(ierr);
1202         }
1203         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1204         a->singlemalloc = PETSC_TRUE;
1205 
1206         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1207         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1208         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1209         a->maxnz += bs2*CHUNKSIZE;
1210         a->reallocs++;
1211         a->nz++;
1212       }
1213       N = nrow++ - 1;
1214       /* shift up all the later entries in this row */
1215       for (ii=N; ii>=i; ii--) {
1216         rp[ii+1] = rp[ii];
1217         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1218       }
1219       if (N>=i) {
1220         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1221       }
1222       rp[i]                      = bcol;
1223       ap[bs2*i + bs*cidx + ridx] = value;
1224       noinsert1:;
1225       low = i;
1226     }
1227     ailen[brow] = nrow;
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1235 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1236 {
1237   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1238   Mat         outA;
1239   int         ierr;
1240   PetscTruth  row_identity,col_identity;
1241 
1242   PetscFunctionBegin;
1243   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1244   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1245   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1246   if (!row_identity || !col_identity) {
1247     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1248   }
1249 
1250   outA          = inA;
1251   inA->factor   = FACTOR_LU;
1252 
1253   if (!a->diag) {
1254     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1255   }
1256 
1257   a->row        = row;
1258   a->col        = col;
1259   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1260   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1261 
1262   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1263   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1264   PetscLogObjectParent(inA,a->icol);
1265 
1266   /*
1267       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1268       for ILU(0) factorization with natural ordering
1269   */
1270   if (a->bs < 8) {
1271     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1272   } else {
1273     if (!a->solve_work) {
1274       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1275       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1276     }
1277   }
1278 
1279   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1280 
1281   PetscFunctionReturn(0);
1282 }
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1285 int MatPrintHelp_SeqBAIJ(Mat A)
1286 {
1287   static PetscTruth called = PETSC_FALSE;
1288   MPI_Comm          comm = A->comm;
1289   int               ierr;
1290 
1291   PetscFunctionBegin;
1292   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1293   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1294   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 EXTERN_C_BEGIN
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1301 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1302 {
1303   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1304   int         i,nz,nbs;
1305 
1306   PetscFunctionBegin;
1307   nz  = baij->maxnz/baij->bs2;
1308   nbs = baij->nbs;
1309   for (i=0; i<nz; i++) {
1310     baij->j[i] = indices[i];
1311   }
1312   baij->nz = nz;
1313   for (i=0; i<nbs; i++) {
1314     baij->ilen[i] = baij->imax[i];
1315   }
1316 
1317   PetscFunctionReturn(0);
1318 }
1319 EXTERN_C_END
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1323 /*@
1324     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1325        in the matrix.
1326 
1327   Input Parameters:
1328 +  mat - the SeqBAIJ matrix
1329 -  indices - the column indices
1330 
1331   Level: advanced
1332 
1333   Notes:
1334     This can be called if you have precomputed the nonzero structure of the
1335   matrix and want to provide it to the matrix object to improve the performance
1336   of the MatSetValues() operation.
1337 
1338     You MUST have set the correct numbers of nonzeros per row in the call to
1339   MatCreateSeqBAIJ().
1340 
1341     MUST be called before any calls to MatSetValues();
1342 
1343 @*/
1344 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1345 {
1346   int ierr,(*f)(Mat,int *);
1347 
1348   PetscFunctionBegin;
1349   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1350   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1351   if (f) {
1352     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1353   } else {
1354     SETERRQ(1,"Wrong type of matrix to set column indices");
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1361 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1362 {
1363   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1364   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1365   PetscReal    atmp;
1366   PetscScalar  *x,zero = 0.0;
1367   MatScalar    *aa;
1368   int          ncols,brow,krow,kcol;
1369 
1370   PetscFunctionBegin;
1371   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1372   bs   = a->bs;
1373   aa   = a->a;
1374   ai   = a->i;
1375   aj   = a->j;
1376   mbs = a->mbs;
1377 
1378   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1379   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1380   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1381   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1382   for (i=0; i<mbs; i++) {
1383     ncols = ai[1] - ai[0]; ai++;
1384     brow  = bs*i;
1385     for (j=0; j<ncols; j++){
1386       /* bcol = bs*(*aj); */
1387       for (kcol=0; kcol<bs; kcol++){
1388         for (krow=0; krow<bs; krow++){
1389           atmp = PetscAbsScalar(*aa); aa++;
1390           row = brow + krow;    /* row index */
1391           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1392           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1393         }
1394       }
1395       aj++;
1396     }
1397   }
1398   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1404 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1405 {
1406   int        ierr;
1407 
1408   PetscFunctionBegin;
1409   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1415 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1416 {
1417   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1418   PetscFunctionBegin;
1419   *array = a->a;
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1425 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1426 {
1427   PetscFunctionBegin;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #include "petscblaslapack.h"
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1434 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1435 {
1436   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1437   int          ierr,one=1,i,bs=y->bs,j,bs2;
1438 
1439   PetscFunctionBegin;
1440   if (str == SAME_NONZERO_PATTERN) {
1441     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1442   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1443     if (y->xtoy && y->XtoY != X) {
1444       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1445       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1446     }
1447     if (!y->xtoy) { /* get xtoy */
1448       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1449       y->XtoY = X;
1450     }
1451     bs2 = bs*bs;
1452     for (i=0; i<x->nz; i++) {
1453       j = 0;
1454       while (j < bs2){
1455         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1456         j++;
1457       }
1458     }
1459     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1460   } else {
1461     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 /* -------------------------------------------------------------------*/
1467 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1468        MatGetRow_SeqBAIJ,
1469        MatRestoreRow_SeqBAIJ,
1470        MatMult_SeqBAIJ_N,
1471 /* 4*/ MatMultAdd_SeqBAIJ_N,
1472        MatMultTranspose_SeqBAIJ,
1473        MatMultTransposeAdd_SeqBAIJ,
1474        MatSolve_SeqBAIJ_N,
1475        0,
1476        0,
1477 /*10*/ 0,
1478        MatLUFactor_SeqBAIJ,
1479        0,
1480        0,
1481        MatTranspose_SeqBAIJ,
1482 /*15*/ MatGetInfo_SeqBAIJ,
1483        MatEqual_SeqBAIJ,
1484        MatGetDiagonal_SeqBAIJ,
1485        MatDiagonalScale_SeqBAIJ,
1486        MatNorm_SeqBAIJ,
1487 /*20*/ 0,
1488        MatAssemblyEnd_SeqBAIJ,
1489        0,
1490        MatSetOption_SeqBAIJ,
1491        MatZeroEntries_SeqBAIJ,
1492 /*25*/ MatZeroRows_SeqBAIJ,
1493        MatLUFactorSymbolic_SeqBAIJ,
1494        MatLUFactorNumeric_SeqBAIJ_N,
1495        0,
1496        0,
1497 /*30*/ MatSetUpPreallocation_SeqBAIJ,
1498        MatILUFactorSymbolic_SeqBAIJ,
1499        0,
1500        MatGetArray_SeqBAIJ,
1501        MatRestoreArray_SeqBAIJ,
1502 /*35*/ MatDuplicate_SeqBAIJ,
1503        0,
1504        0,
1505        MatILUFactor_SeqBAIJ,
1506        0,
1507 /*40*/ MatAXPY_SeqBAIJ,
1508        MatGetSubMatrices_SeqBAIJ,
1509        MatIncreaseOverlap_SeqBAIJ,
1510        MatGetValues_SeqBAIJ,
1511        0,
1512 /*45*/ MatPrintHelp_SeqBAIJ,
1513        MatScale_SeqBAIJ,
1514        0,
1515        0,
1516        0,
1517 /*50*/ MatGetBlockSize_SeqBAIJ,
1518        MatGetRowIJ_SeqBAIJ,
1519        MatRestoreRowIJ_SeqBAIJ,
1520        0,
1521        0,
1522 /*55*/ 0,
1523        0,
1524        0,
1525        0,
1526        MatSetValuesBlocked_SeqBAIJ,
1527 /*60*/ MatGetSubMatrix_SeqBAIJ,
1528        MatDestroy_SeqBAIJ,
1529        MatView_SeqBAIJ,
1530        MatGetPetscMaps_Petsc,
1531        0,
1532 /*65*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*70*/ MatGetRowMax_SeqBAIJ,
1538        MatConvert_Basic,
1539        0,
1540        0,
1541        0,
1542 /*75*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*80*/ 0,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*85*/ MatLoad_SeqBAIJ
1553 };
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1558 int MatStoreValues_SeqBAIJ(Mat mat)
1559 {
1560   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1561   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1562   int         ierr;
1563 
1564   PetscFunctionBegin;
1565   if (aij->nonew != 1) {
1566     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1567   }
1568 
1569   /* allocate space for values if not already there */
1570   if (!aij->saved_values) {
1571     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1572   }
1573 
1574   /* copy values over */
1575   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1583 int MatRetrieveValues_SeqBAIJ(Mat mat)
1584 {
1585   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1586   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1587 
1588   PetscFunctionBegin;
1589   if (aij->nonew != 1) {
1590     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1591   }
1592   if (!aij->saved_values) {
1593     SETERRQ(1,"Must call MatStoreValues(A);first");
1594   }
1595 
1596   /* copy values over */
1597   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 EXTERN_C_END
1601 
1602 EXTERN_C_BEGIN
1603 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1604 EXTERN_C_END
1605 
1606 EXTERN_C_BEGIN
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1609 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1610 {
1611   Mat_SeqBAIJ *b;
1612   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1613   PetscTruth  flg;
1614 
1615   PetscFunctionBegin;
1616 
1617   B->preallocated = PETSC_TRUE;
1618   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1619   if (nnz && newbs != bs) {
1620     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1621   }
1622   bs   = newbs;
1623 
1624   mbs  = B->m/bs;
1625   nbs  = B->n/bs;
1626   bs2  = bs*bs;
1627 
1628   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1629     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1630   }
1631 
1632   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1633   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1634   if (nnz) {
1635     for (i=0; i<mbs; i++) {
1636       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1637       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1638     }
1639   }
1640 
1641   b       = (Mat_SeqBAIJ*)B->data;
1642   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1643   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1644   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1645   if (!flg) {
1646     switch (bs) {
1647     case 1:
1648       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1649       B->ops->mult            = MatMult_SeqBAIJ_1;
1650       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1651       break;
1652     case 2:
1653       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1654       B->ops->mult            = MatMult_SeqBAIJ_2;
1655       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1656       break;
1657     case 3:
1658       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1659       B->ops->mult            = MatMult_SeqBAIJ_3;
1660       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1661       break;
1662     case 4:
1663       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1664       B->ops->mult            = MatMult_SeqBAIJ_4;
1665       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1666       break;
1667     case 5:
1668       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1669       B->ops->mult            = MatMult_SeqBAIJ_5;
1670       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1671       break;
1672     case 6:
1673       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1674       B->ops->mult            = MatMult_SeqBAIJ_6;
1675       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1676       break;
1677     case 7:
1678       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1679       B->ops->mult            = MatMult_SeqBAIJ_7;
1680       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1681       break;
1682     default:
1683       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1684       B->ops->mult            = MatMult_SeqBAIJ_N;
1685       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1686       break;
1687     }
1688   }
1689   b->bs      = bs;
1690   b->mbs     = mbs;
1691   b->nbs     = nbs;
1692   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1693   if (!nnz) {
1694     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1695     else if (nz <= 0)        nz = 1;
1696     for (i=0; i<mbs; i++) b->imax[i] = nz;
1697     nz = nz*mbs;
1698   } else {
1699     nz = 0;
1700     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1701   }
1702 
1703   /* allocate the matrix space */
1704   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1705   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1706   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1707   b->j  = (int*)(b->a + nz*bs2);
1708   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1709   b->i  = b->j + nz;
1710   b->singlemalloc = PETSC_TRUE;
1711 
1712   b->i[0] = 0;
1713   for (i=1; i<mbs+1; i++) {
1714     b->i[i] = b->i[i-1] + b->imax[i-1];
1715   }
1716 
1717   /* b->ilen will count nonzeros in each block row so far. */
1718   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1719   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1720   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1721 
1722   b->bs               = bs;
1723   b->bs2              = bs2;
1724   b->mbs              = mbs;
1725   b->nz               = 0;
1726   b->maxnz            = nz*bs2;
1727   B->info.nz_unneeded = (PetscReal)b->maxnz;
1728   PetscFunctionReturn(0);
1729 }
1730 EXTERN_C_END
1731 
1732 /*MC
1733    MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1734    block sparse compressed row format.
1735 
1736    Options Database Keys:
1737 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1738 
1739   Level: beginner
1740 
1741 .seealso: MatCreateSeqBAIJ
1742 M*/
1743 
1744 EXTERN_C_BEGIN
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatCreate_SeqBAIJ"
1747 int MatCreate_SeqBAIJ(Mat B)
1748 {
1749   int         ierr,size;
1750   Mat_SeqBAIJ *b;
1751 
1752   PetscFunctionBegin;
1753   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1754   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1755 
1756   B->m = B->M = PetscMax(B->m,B->M);
1757   B->n = B->N = PetscMax(B->n,B->N);
1758   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1759   B->data = (void*)b;
1760   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1761   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1762   B->factor           = 0;
1763   B->lupivotthreshold = 1.0;
1764   B->mapping          = 0;
1765   b->row              = 0;
1766   b->col              = 0;
1767   b->icol             = 0;
1768   b->reallocs         = 0;
1769   b->saved_values     = 0;
1770 #if defined(PETSC_USE_MAT_SINGLE)
1771   b->setvalueslen     = 0;
1772   b->setvaluescopy    = PETSC_NULL;
1773 #endif
1774 
1775   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1776   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1777 
1778   b->sorted           = PETSC_FALSE;
1779   b->roworiented      = PETSC_TRUE;
1780   b->nonew            = 0;
1781   b->diag             = 0;
1782   b->solve_work       = 0;
1783   b->mult_work        = 0;
1784   B->spptr            = 0;
1785   B->info.nz_unneeded = (PetscReal)b->maxnz;
1786   b->keepzeroedrows   = PETSC_FALSE;
1787   b->xtoy              = 0;
1788   b->XtoY              = 0;
1789 
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1791                                      "MatStoreValues_SeqBAIJ",
1792                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1794                                      "MatRetrieveValues_SeqBAIJ",
1795                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1797                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1798                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1800                                      "MatConvert_SeqBAIJ_SeqAIJ",
1801                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1803                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1804                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 EXTERN_C_END
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1811 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1812 {
1813   Mat         C;
1814   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1815   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1816 
1817   PetscFunctionBegin;
1818   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1819 
1820   *B = 0;
1821   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1822   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1823   c    = (Mat_SeqBAIJ*)C->data;
1824 
1825   c->bs         = a->bs;
1826   c->bs2        = a->bs2;
1827   c->mbs        = a->mbs;
1828   c->nbs        = a->nbs;
1829   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1830 
1831   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1832   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1833   for (i=0; i<mbs; i++) {
1834     c->imax[i] = a->imax[i];
1835     c->ilen[i] = a->ilen[i];
1836   }
1837 
1838   /* allocate the matrix space */
1839   c->singlemalloc = PETSC_TRUE;
1840   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1841   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1842   c->j = (int*)(c->a + nz*bs2);
1843   c->i = c->j + nz;
1844   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1845   if (mbs > 0) {
1846     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1847     if (cpvalues == MAT_COPY_VALUES) {
1848       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1849     } else {
1850       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1851     }
1852   }
1853 
1854   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1855   c->sorted      = a->sorted;
1856   c->roworiented = a->roworiented;
1857   c->nonew       = a->nonew;
1858 
1859   if (a->diag) {
1860     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1861     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1862     for (i=0; i<mbs; i++) {
1863       c->diag[i] = a->diag[i];
1864     }
1865   } else c->diag        = 0;
1866   c->nz                 = a->nz;
1867   c->maxnz              = a->maxnz;
1868   c->solve_work         = 0;
1869   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1870   c->mult_work          = 0;
1871   C->preallocated       = PETSC_TRUE;
1872   C->assembled          = PETSC_TRUE;
1873   *B = C;
1874   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatLoad_SeqBAIJ"
1880 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1881 {
1882   Mat_SeqBAIJ  *a;
1883   Mat          B;
1884   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1885   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1886   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1887   int          *masked,nmask,tmp,bs2,ishift;
1888   PetscScalar  *aa;
1889   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1890 
1891   PetscFunctionBegin;
1892   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1893   bs2  = bs*bs;
1894 
1895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1896   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1897   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1898   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1899   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1900   M = header[1]; N = header[2]; nz = header[3];
1901 
1902   if (header[3] < 0) {
1903     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1904   }
1905 
1906   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1907 
1908   /*
1909      This code adds extra rows to make sure the number of rows is
1910     divisible by the blocksize
1911   */
1912   mbs        = M/bs;
1913   extra_rows = bs - M + bs*(mbs);
1914   if (extra_rows == bs) extra_rows = 0;
1915   else                  mbs++;
1916   if (extra_rows) {
1917     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1918   }
1919 
1920   /* read in row lengths */
1921   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1922   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1923   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1924 
1925   /* read in column indices */
1926   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1927   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1928   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1929 
1930   /* loop over row lengths determining block row lengths */
1931   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1932   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1933   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1934   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1935   masked   = mask + mbs;
1936   rowcount = 0; nzcount = 0;
1937   for (i=0; i<mbs; i++) {
1938     nmask = 0;
1939     for (j=0; j<bs; j++) {
1940       kmax = rowlengths[rowcount];
1941       for (k=0; k<kmax; k++) {
1942         tmp = jj[nzcount++]/bs;
1943         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1944       }
1945       rowcount++;
1946     }
1947     browlengths[i] += nmask;
1948     /* zero out the mask elements we set */
1949     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1950   }
1951 
1952   /* create our matrix */
1953   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1954   ierr = MatSetType(B,type);CHKERRQ(ierr);
1955   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1956   a = (Mat_SeqBAIJ*)B->data;
1957 
1958   /* set matrix "i" values */
1959   a->i[0] = 0;
1960   for (i=1; i<= mbs; i++) {
1961     a->i[i]      = a->i[i-1] + browlengths[i-1];
1962     a->ilen[i-1] = browlengths[i-1];
1963   }
1964   a->nz         = 0;
1965   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1966 
1967   /* read in nonzero values */
1968   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1969   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1970   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1971 
1972   /* set "a" and "j" values into matrix */
1973   nzcount = 0; jcount = 0;
1974   for (i=0; i<mbs; i++) {
1975     nzcountb = nzcount;
1976     nmask    = 0;
1977     for (j=0; j<bs; j++) {
1978       kmax = rowlengths[i*bs+j];
1979       for (k=0; k<kmax; k++) {
1980         tmp = jj[nzcount++]/bs;
1981 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1982       }
1983     }
1984     /* sort the masked values */
1985     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1986 
1987     /* set "j" values into matrix */
1988     maskcount = 1;
1989     for (j=0; j<nmask; j++) {
1990       a->j[jcount++]  = masked[j];
1991       mask[masked[j]] = maskcount++;
1992     }
1993     /* set "a" values into matrix */
1994     ishift = bs2*a->i[i];
1995     for (j=0; j<bs; j++) {
1996       kmax = rowlengths[i*bs+j];
1997       for (k=0; k<kmax; k++) {
1998         tmp       = jj[nzcountb]/bs ;
1999         block     = mask[tmp] - 1;
2000         point     = jj[nzcountb] - bs*tmp;
2001         idx       = ishift + bs2*block + j + bs*point;
2002         a->a[idx] = (MatScalar)aa[nzcountb++];
2003       }
2004     }
2005     /* zero out the mask elements we set */
2006     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2007   }
2008   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2009 
2010   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2011   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2012   ierr = PetscFree(aa);CHKERRQ(ierr);
2013   ierr = PetscFree(jj);CHKERRQ(ierr);
2014   ierr = PetscFree(mask);CHKERRQ(ierr);
2015 
2016   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2018   ierr = MatView_Private(B);CHKERRQ(ierr);
2019 
2020   *A = B;
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatCreateSeqBAIJ"
2026 /*@C
2027    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2028    compressed row) format.  For good matrix assembly performance the
2029    user should preallocate the matrix storage by setting the parameter nz
2030    (or the array nnz).  By setting these parameters accurately, performance
2031    during matrix assembly can be increased by more than a factor of 50.
2032 
2033    Collective on MPI_Comm
2034 
2035    Input Parameters:
2036 +  comm - MPI communicator, set to PETSC_COMM_SELF
2037 .  bs - size of block
2038 .  m - number of rows
2039 .  n - number of columns
2040 .  nz - number of nonzero blocks  per block row (same for all rows)
2041 -  nnz - array containing the number of nonzero blocks in the various block rows
2042          (possibly different for each block row) or PETSC_NULL
2043 
2044    Output Parameter:
2045 .  A - the matrix
2046 
2047    Options Database Keys:
2048 .   -mat_no_unroll - uses code that does not unroll the loops in the
2049                      block calculations (much slower)
2050 .    -mat_block_size - size of the blocks to use
2051 
2052    Level: intermediate
2053 
2054    Notes:
2055    A nonzero block is any block that as 1 or more nonzeros in it
2056 
2057    The block AIJ format is fully compatible with standard Fortran 77
2058    storage.  That is, the stored row and column indices can begin at
2059    either one (as in Fortran) or zero.  See the users' manual for details.
2060 
2061    Specify the preallocated storage with either nz or nnz (not both).
2062    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2063    allocation.  For additional details, see the users manual chapter on
2064    matrices.
2065 
2066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2067 @*/
2068 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2069 {
2070   int ierr;
2071 
2072   PetscFunctionBegin;
2073   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2074   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2075   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2081 /*@C
2082    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2083    per row in the matrix. For good matrix assembly performance the
2084    user should preallocate the matrix storage by setting the parameter nz
2085    (or the array nnz).  By setting these parameters accurately, performance
2086    during matrix assembly can be increased by more than a factor of 50.
2087 
2088    Collective on MPI_Comm
2089 
2090    Input Parameters:
2091 +  A - the matrix
2092 .  bs - size of block
2093 .  nz - number of block nonzeros per block row (same for all rows)
2094 -  nnz - array containing the number of block nonzeros in the various block rows
2095          (possibly different for each block row) or PETSC_NULL
2096 
2097    Options Database Keys:
2098 .   -mat_no_unroll - uses code that does not unroll the loops in the
2099                      block calculations (much slower)
2100 .    -mat_block_size - size of the blocks to use
2101 
2102    Level: intermediate
2103 
2104    Notes:
2105    The block AIJ format is fully compatible with standard Fortran 77
2106    storage.  That is, the stored row and column indices can begin at
2107    either one (as in Fortran) or zero.  See the users' manual for details.
2108 
2109    Specify the preallocated storage with either nz or nnz (not both).
2110    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2111    allocation.  For additional details, see the users manual chapter on
2112    matrices.
2113 
2114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2115 @*/
2116 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2117 {
2118   int ierr,(*f)(Mat,int,int,const int[]);
2119 
2120   PetscFunctionBegin;
2121   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2122   if (f) {
2123     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2124   }
2125   PetscFunctionReturn(0);
2126 }
2127