xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d34347eca8ac9657cccc548b3ba1c8201ddea9cf)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25 {
26   Mat               A = *AA;
27   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
28   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int               *ai=a->i,*ailen=a->ilen;
30   int               *aj=a->j,stepval;
31   const PetscScalar *value = v;
32   MatScalar         *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 #if defined(PETSC_HAVE_DSCPACK)
163 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
164 #endif
165 
166 #define CHUNKSIZE  10
167 
168 /*
169      Checks for missing diagonals
170 */
171 #undef __FUNCT__
172 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
173 int MatMissingDiagonal_SeqBAIJ(Mat A)
174 {
175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
176   int         *diag,*jj = a->j,i,ierr;
177 
178   PetscFunctionBegin;
179   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
180   diag = a->diag;
181   for (i=0; i<a->mbs; i++) {
182     if (jj[diag[i]] != i) {
183       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
184     }
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
191 int MatMarkDiagonal_SeqBAIJ(Mat A)
192 {
193   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
194   int         i,j,*diag,m = a->mbs,ierr;
195 
196   PetscFunctionBegin;
197   if (a->diag) PetscFunctionReturn(0);
198 
199   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
200   PetscLogObjectMemory(A,(m+1)*sizeof(int));
201   for (i=0; i<m; i++) {
202     diag[i] = a->i[i+1];
203     for (j=a->i[i]; j<a->i[i+1]; j++) {
204       if (a->j[j] == i) {
205         diag[i] = j;
206         break;
207       }
208     }
209   }
210   a->diag = diag;
211   PetscFunctionReturn(0);
212 }
213 
214 
215 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
219 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
220 {
221   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
222   int         ierr,n = a->mbs,i;
223 
224   PetscFunctionBegin;
225   *nn = n;
226   if (!ia) PetscFunctionReturn(0);
227   if (symmetric) {
228     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
229   } else if (oshift == 1) {
230     /* temporarily add 1 to i and j indices */
231     int nz = a->i[n];
232     for (i=0; i<nz; i++) a->j[i]++;
233     for (i=0; i<n+1; i++) a->i[i]++;
234     *ia = a->i; *ja = a->j;
235   } else {
236     *ia = a->i; *ja = a->j;
237   }
238 
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
244 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
245 {
246   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
247   int         i,n = a->mbs,ierr;
248 
249   PetscFunctionBegin;
250   if (!ia) PetscFunctionReturn(0);
251   if (symmetric) {
252     ierr = PetscFree(*ia);CHKERRQ(ierr);
253     ierr = PetscFree(*ja);CHKERRQ(ierr);
254   } else if (oshift == 1) {
255     int nz = a->i[n]-1;
256     for (i=0; i<nz; i++) a->j[i]--;
257     for (i=0; i<n+1; i++) a->i[i]--;
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
264 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
265 {
266   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
267 
268   PetscFunctionBegin;
269   *bs = baij->bs;
270   PetscFunctionReturn(0);
271 }
272 
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatDestroy_SeqBAIJ"
276 int MatDestroy_SeqBAIJ(Mat A)
277 {
278   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
279   int         ierr;
280 
281   PetscFunctionBegin;
282 #if defined(PETSC_USE_LOG)
283   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
284 #endif
285   ierr = PetscFree(a->a);CHKERRQ(ierr);
286   if (!a->singlemalloc) {
287     ierr = PetscFree(a->i);CHKERRQ(ierr);
288     ierr = PetscFree(a->j);CHKERRQ(ierr);
289   }
290   if (a->row) {
291     ierr = ISDestroy(a->row);CHKERRQ(ierr);
292   }
293   if (a->col) {
294     ierr = ISDestroy(a->col);CHKERRQ(ierr);
295   }
296   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
297   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
298   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
299   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
300   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
301   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
302   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
303 #if defined(PETSC_USE_MAT_SINGLE)
304   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
305 #endif
306   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
307 
308   ierr = PetscFree(a);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatSetOption_SeqBAIJ"
314 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
315 {
316   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
317 
318   PetscFunctionBegin;
319   switch (op) {
320   case MAT_ROW_ORIENTED:
321     a->roworiented    = PETSC_TRUE;
322     break;
323   case MAT_COLUMN_ORIENTED:
324     a->roworiented    = PETSC_FALSE;
325     break;
326   case MAT_COLUMNS_SORTED:
327     a->sorted         = PETSC_TRUE;
328     break;
329   case MAT_COLUMNS_UNSORTED:
330     a->sorted         = PETSC_FALSE;
331     break;
332   case MAT_KEEP_ZEROED_ROWS:
333     a->keepzeroedrows = PETSC_TRUE;
334     break;
335   case MAT_NO_NEW_NONZERO_LOCATIONS:
336     a->nonew          = 1;
337     break;
338   case MAT_NEW_NONZERO_LOCATION_ERR:
339     a->nonew          = -1;
340     break;
341   case MAT_NEW_NONZERO_ALLOCATION_ERR:
342     a->nonew          = -2;
343     break;
344   case MAT_YES_NEW_NONZERO_LOCATIONS:
345     a->nonew          = 0;
346     break;
347   case MAT_ROWS_SORTED:
348   case MAT_ROWS_UNSORTED:
349   case MAT_YES_NEW_DIAGONALS:
350   case MAT_IGNORE_OFF_PROC_ENTRIES:
351   case MAT_USE_HASH_TABLE:
352     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
353     break;
354   case MAT_NO_NEW_DIAGONALS:
355     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
356   default:
357     SETERRQ(PETSC_ERR_SUP,"unknown option");
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetRow_SeqBAIJ"
364 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
365 {
366   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
367   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
368   MatScalar    *aa,*aa_i;
369   PetscScalar  *v_i;
370 
371   PetscFunctionBegin;
372   bs  = a->bs;
373   ai  = a->i;
374   aj  = a->j;
375   aa  = a->a;
376   bs2 = a->bs2;
377 
378   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
379 
380   bn  = row/bs;   /* Block number */
381   bp  = row % bs; /* Block Position */
382   M   = ai[bn+1] - ai[bn];
383   *nz = bs*M;
384 
385   if (v) {
386     *v = 0;
387     if (*nz) {
388       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
389       for (i=0; i<M; i++) { /* for each block in the block row */
390         v_i  = *v + i*bs;
391         aa_i = aa + bs2*(ai[bn] + i);
392         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
393       }
394     }
395   }
396 
397   if (idx) {
398     *idx = 0;
399     if (*nz) {
400       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
401       for (i=0; i<M; i++) { /* for each block in the block row */
402         idx_i = *idx + i*bs;
403         itmp  = bs*aj[ai[bn] + i];
404         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
405       }
406     }
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
413 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
414 {
415   int ierr;
416 
417   PetscFunctionBegin;
418   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
419   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatTranspose_SeqBAIJ"
425 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
426 {
427   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
428   Mat         C;
429   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
430   int         *rows,*cols,bs2=a->bs2;
431   PetscScalar *array;
432 
433   PetscFunctionBegin;
434   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
435   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
436   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
437 
438 #if defined(PETSC_USE_MAT_SINGLE)
439   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
440   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
441 #else
442   array = a->a;
443 #endif
444 
445   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
446   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
447   ierr = PetscFree(col);CHKERRQ(ierr);
448   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
449   cols = rows + bs;
450   for (i=0; i<mbs; i++) {
451     cols[0] = i*bs;
452     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
453     len = ai[i+1] - ai[i];
454     for (j=0; j<len; j++) {
455       rows[0] = (*aj++)*bs;
456       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
457       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
458       array += bs2;
459     }
460   }
461   ierr = PetscFree(rows);CHKERRQ(ierr);
462 #if defined(PETSC_USE_MAT_SINGLE)
463   ierr = PetscFree(array);
464 #endif
465 
466   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468 
469   if (B) {
470     *B = C;
471   } else {
472     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
479 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
480 {
481   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
482   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
483   PetscScalar *aa;
484   FILE        *file;
485 
486   PetscFunctionBegin;
487   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
488   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
489   col_lens[0] = MAT_FILE_COOKIE;
490 
491   col_lens[1] = A->m;
492   col_lens[2] = A->n;
493   col_lens[3] = a->nz*bs2;
494 
495   /* store lengths of each row and write (including header) to file */
496   count = 0;
497   for (i=0; i<a->mbs; i++) {
498     for (j=0; j<bs; j++) {
499       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
500     }
501   }
502   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
503   ierr = PetscFree(col_lens);CHKERRQ(ierr);
504 
505   /* store column indices (zero start index) */
506   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<a->mbs; i++) {
509     for (j=0; j<bs; j++) {
510       for (k=a->i[i]; k<a->i[i+1]; k++) {
511         for (l=0; l<bs; l++) {
512           jj[count++] = bs*a->j[k] + l;
513         }
514       }
515     }
516   }
517   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
518   ierr = PetscFree(jj);CHKERRQ(ierr);
519 
520   /* store nonzero values */
521   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
522   count = 0;
523   for (i=0; i<a->mbs; i++) {
524     for (j=0; j<bs; j++) {
525       for (k=a->i[i]; k<a->i[i+1]; k++) {
526         for (l=0; l<bs; l++) {
527           aa[count++] = a->a[bs2*k + l*bs + j];
528         }
529       }
530     }
531   }
532   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
533   ierr = PetscFree(aa);CHKERRQ(ierr);
534 
535   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
536   if (file) {
537     fprintf(file,"-matload_block_size %d\n",a->bs);
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
546 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
547 {
548   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
549   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
550   PetscViewerFormat format;
551 
552   PetscFunctionBegin;
553   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
554   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
555     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
556   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
557     Mat aij;
558     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
559     ierr = MatView(aij,viewer);CHKERRQ(ierr);
560     ierr = MatDestroy(aij);CHKERRQ(ierr);
561   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
562 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
563      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
564 #endif
565      PetscFunctionReturn(0);
566   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
567     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
568     for (i=0; i<a->mbs; i++) {
569       for (j=0; j<bs; j++) {
570         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
571         for (k=a->i[i]; k<a->i[i+1]; k++) {
572           for (l=0; l<bs; l++) {
573 #if defined(PETSC_USE_COMPLEX)
574             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
575               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
576                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
577             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
578               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
579                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
580             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
581               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
582             }
583 #else
584             if (a->a[bs2*k + l*bs + j] != 0.0) {
585               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
586             }
587 #endif
588           }
589         }
590         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
591       }
592     }
593     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
594   } else {
595     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
596     for (i=0; i<a->mbs; i++) {
597       for (j=0; j<bs; j++) {
598         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
599         for (k=a->i[i]; k<a->i[i+1]; k++) {
600           for (l=0; l<bs; l++) {
601 #if defined(PETSC_USE_COMPLEX)
602             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
603               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
604                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
605             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
606               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
607                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
608             } else {
609               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
610             }
611 #else
612             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
613 #endif
614           }
615         }
616         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
617       }
618     }
619     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
620   }
621   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
627 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
628 {
629   Mat          A = (Mat) Aa;
630   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
631   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
632   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
633   MatScalar    *aa;
634   PetscViewer  viewer;
635 
636   PetscFunctionBegin;
637 
638   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
639   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
640 
641   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
642 
643   /* loop over matrix elements drawing boxes */
644   color = PETSC_DRAW_BLUE;
645   for (i=0,row=0; i<mbs; i++,row+=bs) {
646     for (j=a->i[i]; j<a->i[i+1]; j++) {
647       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
648       x_l = a->j[j]*bs; x_r = x_l + 1.0;
649       aa = a->a + j*bs2;
650       for (k=0; k<bs; k++) {
651         for (l=0; l<bs; l++) {
652           if (PetscRealPart(*aa++) >=  0.) continue;
653           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
654         }
655       }
656     }
657   }
658   color = PETSC_DRAW_CYAN;
659   for (i=0,row=0; i<mbs; i++,row+=bs) {
660     for (j=a->i[i]; j<a->i[i+1]; j++) {
661       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
662       x_l = a->j[j]*bs; x_r = x_l + 1.0;
663       aa = a->a + j*bs2;
664       for (k=0; k<bs; k++) {
665         for (l=0; l<bs; l++) {
666           if (PetscRealPart(*aa++) != 0.) continue;
667           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
668         }
669       }
670     }
671   }
672 
673   color = PETSC_DRAW_RED;
674   for (i=0,row=0; i<mbs; i++,row+=bs) {
675     for (j=a->i[i]; j<a->i[i+1]; j++) {
676       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
677       x_l = a->j[j]*bs; x_r = x_l + 1.0;
678       aa = a->a + j*bs2;
679       for (k=0; k<bs; k++) {
680         for (l=0; l<bs; l++) {
681           if (PetscRealPart(*aa++) <= 0.) continue;
682           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
692 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
693 {
694   int          ierr;
695   PetscReal    xl,yl,xr,yr,w,h;
696   PetscDraw    draw;
697   PetscTruth   isnull;
698 
699   PetscFunctionBegin;
700 
701   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
702   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
703 
704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
705   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
706   xr += w;    yr += h;  xl = -w;     yl = -h;
707   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
708   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
709   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatView_SeqBAIJ"
715 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
716 {
717   int        ierr;
718   PetscTruth issocket,isascii,isbinary,isdraw;
719 
720   PetscFunctionBegin;
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
723   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
724   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
725   if (issocket) {
726     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
727   } else if (isascii){
728     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
729   } else if (isbinary) {
730     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
731   } else if (isdraw) {
732     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
733   } else {
734     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatGetValues_SeqBAIJ"
742 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
743 {
744   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
745   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
746   int        *ai = a->i,*ailen = a->ilen;
747   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
748   MatScalar  *ap,*aa = a->a,zero = 0.0;
749 
750   PetscFunctionBegin;
751   for (k=0; k<m; k++) { /* loop over rows */
752     row  = im[k]; brow = row/bs;
753     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
754     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
755     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
756     nrow = ailen[brow];
757     for (l=0; l<n; l++) { /* loop over columns */
758       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
759       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
760       col  = in[l] ;
761       bcol = col/bs;
762       cidx = col%bs;
763       ridx = row%bs;
764       high = nrow;
765       low  = 0; /* assume unsorted */
766       while (high-low > 5) {
767         t = (low+high)/2;
768         if (rp[t] > bcol) high = t;
769         else             low  = t;
770       }
771       for (i=low; i<high; i++) {
772         if (rp[i] > bcol) break;
773         if (rp[i] == bcol) {
774           *v++ = ap[bs2*i+bs*cidx+ridx];
775           goto finished;
776         }
777       }
778       *v++ = zero;
779       finished:;
780     }
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 #if defined(PETSC_USE_MAT_SINGLE)
786 #undef __FUNCT__
787 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
788 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
789 {
790   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
791   int         ierr,i,N = m*n*b->bs2;
792   MatScalar   *vsingle;
793 
794   PetscFunctionBegin;
795   if (N > b->setvalueslen) {
796     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
797     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
798     b->setvalueslen  = N;
799   }
800   vsingle = b->setvaluescopy;
801   for (i=0; i<N; i++) {
802     vsingle[i] = v[i];
803   }
804   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 #endif
808 
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
812 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
813 {
814   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
815   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
816   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
817   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
818   PetscTruth        roworiented=a->roworiented;
819   const MatScalar   *value = v;
820   MatScalar         *ap,*aa = a->a,*bap;
821 
822   PetscFunctionBegin;
823   if (roworiented) {
824     stepval = (n-1)*bs;
825   } else {
826     stepval = (m-1)*bs;
827   }
828   for (k=0; k<m; k++) { /* loop over added rows */
829     row  = im[k];
830     if (row < 0) continue;
831 #if defined(PETSC_USE_BOPT_g)
832     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
833 #endif
834     rp   = aj + ai[row];
835     ap   = aa + bs2*ai[row];
836     rmax = imax[row];
837     nrow = ailen[row];
838     low  = 0;
839     for (l=0; l<n; l++) { /* loop over added columns */
840       if (in[l] < 0) continue;
841 #if defined(PETSC_USE_BOPT_g)
842       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
843 #endif
844       col = in[l];
845       if (roworiented) {
846         value = v + k*(stepval+bs)*bs + l*bs;
847       } else {
848         value = v + l*(stepval+bs)*bs + k*bs;
849       }
850       if (!sorted) low = 0; high = nrow;
851       while (high-low > 7) {
852         t = (low+high)/2;
853         if (rp[t] > col) high = t;
854         else             low  = t;
855       }
856       for (i=low; i<high; i++) {
857         if (rp[i] > col) break;
858         if (rp[i] == col) {
859           bap  = ap +  bs2*i;
860           if (roworiented) {
861             if (is == ADD_VALUES) {
862               for (ii=0; ii<bs; ii++,value+=stepval) {
863                 for (jj=ii; jj<bs2; jj+=bs) {
864                   bap[jj] += *value++;
865                 }
866               }
867             } else {
868               for (ii=0; ii<bs; ii++,value+=stepval) {
869                 for (jj=ii; jj<bs2; jj+=bs) {
870                   bap[jj] = *value++;
871                 }
872               }
873             }
874           } else {
875             if (is == ADD_VALUES) {
876               for (ii=0; ii<bs; ii++,value+=stepval) {
877                 for (jj=0; jj<bs; jj++) {
878                   *bap++ += *value++;
879                 }
880               }
881             } else {
882               for (ii=0; ii<bs; ii++,value+=stepval) {
883                 for (jj=0; jj<bs; jj++) {
884                   *bap++  = *value++;
885                 }
886               }
887             }
888           }
889           goto noinsert2;
890         }
891       }
892       if (nonew == 1) goto noinsert2;
893       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
894       if (nrow >= rmax) {
895         /* there is no extra room in row, therefore enlarge */
896         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
897         MatScalar *new_a;
898 
899         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
900 
901         /* malloc new storage space */
902         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
903 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
904         new_j   = (int*)(new_a + bs2*new_nz);
905         new_i   = new_j + new_nz;
906 
907         /* copy over old data into new slots */
908         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
909         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
910         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
911         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
912         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
913         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
914         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
915         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
916         /* free up old matrix storage */
917         ierr = PetscFree(a->a);CHKERRQ(ierr);
918         if (!a->singlemalloc) {
919           ierr = PetscFree(a->i);CHKERRQ(ierr);
920           ierr = PetscFree(a->j);CHKERRQ(ierr);
921         }
922         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
923         a->singlemalloc = PETSC_TRUE;
924 
925         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
926         rmax = imax[row] = imax[row] + CHUNKSIZE;
927         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
928         a->maxnz += bs2*CHUNKSIZE;
929         a->reallocs++;
930         a->nz++;
931       }
932       N = nrow++ - 1;
933       /* shift up all the later entries in this row */
934       for (ii=N; ii>=i; ii--) {
935         rp[ii+1] = rp[ii];
936         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
937       }
938       if (N >= i) {
939         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
940       }
941       rp[i] = col;
942       bap   = ap +  bs2*i;
943       if (roworiented) {
944         for (ii=0; ii<bs; ii++,value+=stepval) {
945           for (jj=ii; jj<bs2; jj+=bs) {
946             bap[jj] = *value++;
947           }
948         }
949       } else {
950         for (ii=0; ii<bs; ii++,value+=stepval) {
951           for (jj=0; jj<bs; jj++) {
952             *bap++  = *value++;
953           }
954         }
955       }
956       noinsert2:;
957       low = i;
958     }
959     ailen[row] = nrow;
960   }
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
966 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
967 {
968   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
969   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
970   int        m = A->m,*ip,N,*ailen = a->ilen;
971   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
972   MatScalar  *aa = a->a,*ap;
973 #if defined(PETSC_HAVE_DSCPACK)
974   PetscTruth   flag;
975 #endif
976 
977   PetscFunctionBegin;
978   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
979 
980   if (m) rmax = ailen[0];
981   for (i=1; i<mbs; i++) {
982     /* move each row back by the amount of empty slots (fshift) before it*/
983     fshift += imax[i-1] - ailen[i-1];
984     rmax   = PetscMax(rmax,ailen[i]);
985     if (fshift) {
986       ip = aj + ai[i]; ap = aa + bs2*ai[i];
987       N = ailen[i];
988       for (j=0; j<N; j++) {
989         ip[j-fshift] = ip[j];
990         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
991       }
992     }
993     ai[i] = ai[i-1] + ailen[i-1];
994   }
995   if (mbs) {
996     fshift += imax[mbs-1] - ailen[mbs-1];
997     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
998   }
999   /* reset ilen and imax for each row */
1000   for (i=0; i<mbs; i++) {
1001     ailen[i] = imax[i] = ai[i+1] - ai[i];
1002   }
1003   a->nz = ai[mbs];
1004 
1005   /* diagonals may have moved, so kill the diagonal pointers */
1006   if (fshift && a->diag) {
1007     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1008     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1009     a->diag = 0;
1010   }
1011   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1012   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1013   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1014   a->reallocs          = 0;
1015   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1016 
1017 #if defined(PETSC_HAVE_DSCPACK)
1018   ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1019   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
1020 #endif
1021 
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 
1026 
1027 /*
1028    This function returns an array of flags which indicate the locations of contiguous
1029    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1030    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1031    Assume: sizes should be long enough to hold all the values.
1032 */
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1035 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1036 {
1037   int        i,j,k,row;
1038   PetscTruth flg;
1039 
1040   PetscFunctionBegin;
1041   for (i=0,j=0; i<n; j++) {
1042     row = idx[i];
1043     if (row%bs!=0) { /* Not the begining of a block */
1044       sizes[j] = 1;
1045       i++;
1046     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1047       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1048       i++;
1049     } else { /* Begining of the block, so check if the complete block exists */
1050       flg = PETSC_TRUE;
1051       for (k=1; k<bs; k++) {
1052         if (row+k != idx[i+k]) { /* break in the block */
1053           flg = PETSC_FALSE;
1054           break;
1055         }
1056       }
1057       if (flg == PETSC_TRUE) { /* No break in the bs */
1058         sizes[j] = bs;
1059         i+= bs;
1060       } else {
1061         sizes[j] = 1;
1062         i++;
1063       }
1064     }
1065   }
1066   *bs_max = j;
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1072 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1073 {
1074   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1075   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1076   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1077   PetscScalar zero = 0.0;
1078   MatScalar   *aa;
1079 
1080   PetscFunctionBegin;
1081   /* Make a copy of the IS and  sort it */
1082   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1083   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1084 
1085   /* allocate memory for rows,sizes */
1086   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1087   sizes = rows + is_n;
1088 
1089   /* copy IS values to rows, and sort them */
1090   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1091   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1092   if (baij->keepzeroedrows) {
1093     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1094     bs_max = is_n;
1095   } else {
1096     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1097   }
1098   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1099 
1100   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1101     row   = rows[j];
1102     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1103     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1104     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1105     if (sizes[i] == bs && !baij->keepzeroedrows) {
1106       if (diag) {
1107         if (baij->ilen[row/bs] > 0) {
1108           baij->ilen[row/bs]       = 1;
1109           baij->j[baij->i[row/bs]] = row/bs;
1110           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1111         }
1112         /* Now insert all the diagonal values for this bs */
1113         for (k=0; k<bs; k++) {
1114           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1115         }
1116       } else { /* (!diag) */
1117         baij->ilen[row/bs] = 0;
1118       } /* end (!diag) */
1119     } else { /* (sizes[i] != bs) */
1120 #if defined (PETSC_USE_DEBUG)
1121       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1122 #endif
1123       for (k=0; k<count; k++) {
1124         aa[0] =  zero;
1125         aa    += bs;
1126       }
1127       if (diag) {
1128         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1129       }
1130     }
1131   }
1132 
1133   ierr = PetscFree(rows);CHKERRQ(ierr);
1134   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1140 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1141 {
1142   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1143   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1144   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1145   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1146   int         ridx,cidx,bs2=a->bs2,ierr;
1147   PetscTruth  roworiented=a->roworiented;
1148   MatScalar   *ap,value,*aa=a->a,*bap;
1149 
1150   PetscFunctionBegin;
1151   for (k=0; k<m; k++) { /* loop over added rows */
1152     row  = im[k]; brow = row/bs;
1153     if (row < 0) continue;
1154 #if defined(PETSC_USE_BOPT_g)
1155     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1156 #endif
1157     rp   = aj + ai[brow];
1158     ap   = aa + bs2*ai[brow];
1159     rmax = imax[brow];
1160     nrow = ailen[brow];
1161     low  = 0;
1162     for (l=0; l<n; l++) { /* loop over added columns */
1163       if (in[l] < 0) continue;
1164 #if defined(PETSC_USE_BOPT_g)
1165       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1166 #endif
1167       col = in[l]; bcol = col/bs;
1168       ridx = row % bs; cidx = col % bs;
1169       if (roworiented) {
1170         value = v[l + k*n];
1171       } else {
1172         value = v[k + l*m];
1173       }
1174       if (!sorted) low = 0; high = nrow;
1175       while (high-low > 7) {
1176         t = (low+high)/2;
1177         if (rp[t] > bcol) high = t;
1178         else              low  = t;
1179       }
1180       for (i=low; i<high; i++) {
1181         if (rp[i] > bcol) break;
1182         if (rp[i] == bcol) {
1183           bap  = ap +  bs2*i + bs*cidx + ridx;
1184           if (is == ADD_VALUES) *bap += value;
1185           else                  *bap  = value;
1186           goto noinsert1;
1187         }
1188       }
1189       if (nonew == 1) goto noinsert1;
1190       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1191       if (nrow >= rmax) {
1192         /* there is no extra room in row, therefore enlarge */
1193         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1194         MatScalar *new_a;
1195 
1196         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1197 
1198         /* Malloc new storage space */
1199         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1200 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1201         new_j   = (int*)(new_a + bs2*new_nz);
1202         new_i   = new_j + new_nz;
1203 
1204         /* copy over old data into new slots */
1205         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1206         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1207         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1208         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1209         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1210         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1211         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1212         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1213         /* free up old matrix storage */
1214         ierr = PetscFree(a->a);CHKERRQ(ierr);
1215         if (!a->singlemalloc) {
1216           ierr = PetscFree(a->i);CHKERRQ(ierr);
1217           ierr = PetscFree(a->j);CHKERRQ(ierr);
1218         }
1219         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1220         a->singlemalloc = PETSC_TRUE;
1221 
1222         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1223         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1224         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1225         a->maxnz += bs2*CHUNKSIZE;
1226         a->reallocs++;
1227         a->nz++;
1228       }
1229       N = nrow++ - 1;
1230       /* shift up all the later entries in this row */
1231       for (ii=N; ii>=i; ii--) {
1232         rp[ii+1] = rp[ii];
1233         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1234       }
1235       if (N>=i) {
1236         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1237       }
1238       rp[i]                      = bcol;
1239       ap[bs2*i + bs*cidx + ridx] = value;
1240       noinsert1:;
1241       low = i;
1242     }
1243     ailen[brow] = nrow;
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1251 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1252 {
1253   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1254   Mat         outA;
1255   int         ierr;
1256   PetscTruth  row_identity,col_identity;
1257 
1258   PetscFunctionBegin;
1259   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1260   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1261   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1262   if (!row_identity || !col_identity) {
1263     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1264   }
1265 
1266   outA          = inA;
1267   inA->factor   = FACTOR_LU;
1268 
1269   if (!a->diag) {
1270     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1271   }
1272 
1273   a->row        = row;
1274   a->col        = col;
1275   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1276   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1277 
1278   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1279   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1280   PetscLogObjectParent(inA,a->icol);
1281 
1282   /*
1283       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1284       for ILU(0) factorization with natural ordering
1285   */
1286   if (a->bs < 8) {
1287     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1288   } else {
1289     if (!a->solve_work) {
1290       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1291       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1292     }
1293   }
1294 
1295   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1296 
1297   PetscFunctionReturn(0);
1298 }
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1301 int MatPrintHelp_SeqBAIJ(Mat A)
1302 {
1303   static PetscTruth called = PETSC_FALSE;
1304   MPI_Comm          comm = A->comm;
1305   int               ierr;
1306 
1307   PetscFunctionBegin;
1308   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1309   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1310   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 EXTERN_C_BEGIN
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1317 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1318 {
1319   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1320   int         i,nz,nbs;
1321 
1322   PetscFunctionBegin;
1323   nz  = baij->maxnz/baij->bs2;
1324   nbs = baij->nbs;
1325   for (i=0; i<nz; i++) {
1326     baij->j[i] = indices[i];
1327   }
1328   baij->nz = nz;
1329   for (i=0; i<nbs; i++) {
1330     baij->ilen[i] = baij->imax[i];
1331   }
1332 
1333   PetscFunctionReturn(0);
1334 }
1335 EXTERN_C_END
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1339 /*@
1340     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1341        in the matrix.
1342 
1343   Input Parameters:
1344 +  mat - the SeqBAIJ matrix
1345 -  indices - the column indices
1346 
1347   Level: advanced
1348 
1349   Notes:
1350     This can be called if you have precomputed the nonzero structure of the
1351   matrix and want to provide it to the matrix object to improve the performance
1352   of the MatSetValues() operation.
1353 
1354     You MUST have set the correct numbers of nonzeros per row in the call to
1355   MatCreateSeqBAIJ().
1356 
1357     MUST be called before any calls to MatSetValues();
1358 
1359 @*/
1360 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1361 {
1362   int ierr,(*f)(Mat,int *);
1363 
1364   PetscFunctionBegin;
1365   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1366   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1367   if (f) {
1368     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1369   } else {
1370     SETERRQ(1,"Wrong type of matrix to set column indices");
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1377 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1378 {
1379   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1380   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1381   PetscReal    atmp;
1382   PetscScalar  *x,zero = 0.0;
1383   MatScalar    *aa;
1384   int          ncols,brow,krow,kcol;
1385 
1386   PetscFunctionBegin;
1387   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1388   bs   = a->bs;
1389   aa   = a->a;
1390   ai   = a->i;
1391   aj   = a->j;
1392   mbs = a->mbs;
1393 
1394   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1395   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1396   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1397   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1398   for (i=0; i<mbs; i++) {
1399     ncols = ai[1] - ai[0]; ai++;
1400     brow  = bs*i;
1401     for (j=0; j<ncols; j++){
1402       /* bcol = bs*(*aj); */
1403       for (kcol=0; kcol<bs; kcol++){
1404         for (krow=0; krow<bs; krow++){
1405           atmp = PetscAbsScalar(*aa); aa++;
1406           row = brow + krow;    /* row index */
1407           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1408           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1409         }
1410       }
1411       aj++;
1412     }
1413   }
1414   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1420 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1421 {
1422   int        ierr;
1423 
1424   PetscFunctionBegin;
1425   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1431 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1432 {
1433   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1434   PetscFunctionBegin;
1435   *array = a->a;
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1441 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1442 {
1443   PetscFunctionBegin;
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #include "petscblaslapack.h"
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1450 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1451 {
1452   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1453   int          ierr,one=1,i,bs=y->bs,j,bs2;
1454 
1455   PetscFunctionBegin;
1456   if (str == SAME_NONZERO_PATTERN) {
1457     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1458   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1459     if (y->xtoy && y->XtoY != X) {
1460       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1461       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1462     }
1463     if (!y->xtoy) { /* get xtoy */
1464       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1465       y->XtoY = X;
1466     }
1467     bs2 = bs*bs;
1468     for (i=0; i<x->nz; i++) {
1469       j = 0;
1470       while (j < bs2){
1471         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1472         j++;
1473       }
1474     }
1475     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1476   } else {
1477     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /* -------------------------------------------------------------------*/
1483 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1484        MatGetRow_SeqBAIJ,
1485        MatRestoreRow_SeqBAIJ,
1486        MatMult_SeqBAIJ_N,
1487        MatMultAdd_SeqBAIJ_N,
1488        MatMultTranspose_SeqBAIJ,
1489        MatMultTransposeAdd_SeqBAIJ,
1490        MatSolve_SeqBAIJ_N,
1491        0,
1492        0,
1493        0,
1494        MatLUFactor_SeqBAIJ,
1495        0,
1496        0,
1497        MatTranspose_SeqBAIJ,
1498        MatGetInfo_SeqBAIJ,
1499        MatEqual_SeqBAIJ,
1500        MatGetDiagonal_SeqBAIJ,
1501        MatDiagonalScale_SeqBAIJ,
1502        MatNorm_SeqBAIJ,
1503        0,
1504        MatAssemblyEnd_SeqBAIJ,
1505        0,
1506        MatSetOption_SeqBAIJ,
1507        MatZeroEntries_SeqBAIJ,
1508        MatZeroRows_SeqBAIJ,
1509        MatLUFactorSymbolic_SeqBAIJ,
1510        MatLUFactorNumeric_SeqBAIJ_N,
1511        0,
1512        0,
1513        MatSetUpPreallocation_SeqBAIJ,
1514        MatILUFactorSymbolic_SeqBAIJ,
1515        0,
1516        MatGetArray_SeqBAIJ,
1517        MatRestoreArray_SeqBAIJ,
1518        MatDuplicate_SeqBAIJ,
1519        0,
1520        0,
1521        MatILUFactor_SeqBAIJ,
1522        0,
1523        MatAXPY_SeqBAIJ,
1524        MatGetSubMatrices_SeqBAIJ,
1525        MatIncreaseOverlap_SeqBAIJ,
1526        MatGetValues_SeqBAIJ,
1527        0,
1528        MatPrintHelp_SeqBAIJ,
1529        MatScale_SeqBAIJ,
1530        0,
1531        0,
1532        0,
1533        MatGetBlockSize_SeqBAIJ,
1534        MatGetRowIJ_SeqBAIJ,
1535        MatRestoreRowIJ_SeqBAIJ,
1536        0,
1537        0,
1538        0,
1539        0,
1540        0,
1541        0,
1542        MatSetValuesBlocked_SeqBAIJ,
1543        MatGetSubMatrix_SeqBAIJ,
1544        MatDestroy_SeqBAIJ,
1545        MatView_SeqBAIJ,
1546        MatGetPetscMaps_Petsc,
1547        0,
1548        0,
1549        0,
1550        0,
1551        0,
1552        0,
1553        MatGetRowMax_SeqBAIJ,
1554        MatConvert_Basic};
1555 
1556 EXTERN_C_BEGIN
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1559 int MatStoreValues_SeqBAIJ(Mat mat)
1560 {
1561   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1562   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1563   int         ierr;
1564 
1565   PetscFunctionBegin;
1566   if (aij->nonew != 1) {
1567     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1568   }
1569 
1570   /* allocate space for values if not already there */
1571   if (!aij->saved_values) {
1572     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1573   }
1574 
1575   /* copy values over */
1576   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1584 int MatRetrieveValues_SeqBAIJ(Mat mat)
1585 {
1586   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1587   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1588 
1589   PetscFunctionBegin;
1590   if (aij->nonew != 1) {
1591     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1592   }
1593   if (!aij->saved_values) {
1594     SETERRQ(1,"Must call MatStoreValues(A);first");
1595   }
1596 
1597   /* copy values over */
1598   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 EXTERN_C_END
1602 
1603 EXTERN_C_BEGIN
1604 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1605 EXTERN_C_END
1606 
1607 EXTERN_C_BEGIN
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1610 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1611 {
1612   Mat_SeqBAIJ *b;
1613   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1614   PetscTruth  flg;
1615 
1616   PetscFunctionBegin;
1617 
1618   B->preallocated = PETSC_TRUE;
1619   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1620   if (nnz && newbs != bs) {
1621     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1622   }
1623   bs   = newbs;
1624 
1625   mbs  = B->m/bs;
1626   nbs  = B->n/bs;
1627   bs2  = bs*bs;
1628 
1629   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1630     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1631   }
1632 
1633   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1634   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1635   if (nnz) {
1636     for (i=0; i<mbs; i++) {
1637       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1638       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1639     }
1640   }
1641 
1642   b       = (Mat_SeqBAIJ*)B->data;
1643   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1644   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1645   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1646   if (!flg) {
1647     switch (bs) {
1648     case 1:
1649       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1650       B->ops->mult            = MatMult_SeqBAIJ_1;
1651       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1652       break;
1653     case 2:
1654       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1655       B->ops->mult            = MatMult_SeqBAIJ_2;
1656       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1657       break;
1658     case 3:
1659       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1660       B->ops->mult            = MatMult_SeqBAIJ_3;
1661       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1662       break;
1663     case 4:
1664       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1665       B->ops->mult            = MatMult_SeqBAIJ_4;
1666       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1667       break;
1668     case 5:
1669       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1670       B->ops->mult            = MatMult_SeqBAIJ_5;
1671       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1672       break;
1673     case 6:
1674       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1675       B->ops->mult            = MatMult_SeqBAIJ_6;
1676       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1677       break;
1678     case 7:
1679       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1680       B->ops->mult            = MatMult_SeqBAIJ_7;
1681       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1682       break;
1683     default:
1684       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1685       B->ops->mult            = MatMult_SeqBAIJ_N;
1686       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1687       break;
1688     }
1689   }
1690   b->bs      = bs;
1691   b->mbs     = mbs;
1692   b->nbs     = nbs;
1693   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1694   if (!nnz) {
1695     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1696     else if (nz <= 0)        nz = 1;
1697     for (i=0; i<mbs; i++) b->imax[i] = nz;
1698     nz = nz*mbs;
1699   } else {
1700     nz = 0;
1701     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1702   }
1703 
1704   /* allocate the matrix space */
1705   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1706   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1707   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1708   b->j  = (int*)(b->a + nz*bs2);
1709   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1710   b->i  = b->j + nz;
1711   b->singlemalloc = PETSC_TRUE;
1712 
1713   b->i[0] = 0;
1714   for (i=1; i<mbs+1; i++) {
1715     b->i[i] = b->i[i-1] + b->imax[i-1];
1716   }
1717 
1718   /* b->ilen will count nonzeros in each block row so far. */
1719   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1720   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1721   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1722 
1723   b->bs               = bs;
1724   b->bs2              = bs2;
1725   b->mbs              = mbs;
1726   b->nz               = 0;
1727   b->maxnz            = nz*bs2;
1728   B->info.nz_unneeded = (PetscReal)b->maxnz;
1729   PetscFunctionReturn(0);
1730 }
1731 EXTERN_C_END
1732 
1733 EXTERN_C_BEGIN
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatCreate_SeqBAIJ"
1736 int MatCreate_SeqBAIJ(Mat B)
1737 {
1738   int         ierr,size;
1739   Mat_SeqBAIJ *b;
1740 
1741   PetscFunctionBegin;
1742   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1743   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1744 
1745   B->m = B->M = PetscMax(B->m,B->M);
1746   B->n = B->N = PetscMax(B->n,B->N);
1747   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1748   B->data = (void*)b;
1749   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1750   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   B->factor           = 0;
1752   B->lupivotthreshold = 1.0;
1753   B->mapping          = 0;
1754   b->row              = 0;
1755   b->col              = 0;
1756   b->icol             = 0;
1757   b->reallocs         = 0;
1758   b->saved_values     = 0;
1759 #if defined(PETSC_USE_MAT_SINGLE)
1760   b->setvalueslen     = 0;
1761   b->setvaluescopy    = PETSC_NULL;
1762 #endif
1763 
1764   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1765   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1766 
1767   b->sorted           = PETSC_FALSE;
1768   b->roworiented      = PETSC_TRUE;
1769   b->nonew            = 0;
1770   b->diag             = 0;
1771   b->solve_work       = 0;
1772   b->mult_work        = 0;
1773   B->spptr            = 0;
1774   B->info.nz_unneeded = (PetscReal)b->maxnz;
1775   b->keepzeroedrows   = PETSC_FALSE;
1776   b->xtoy              = 0;
1777   b->XtoY              = 0;
1778 
1779   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1780                                      "MatStoreValues_SeqBAIJ",
1781                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1783                                      "MatRetrieveValues_SeqBAIJ",
1784                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1786                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1787                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1789                                      "MatConvert_SeqBAIJ_SeqAIJ",
1790                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1792                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1793                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 EXTERN_C_END
1797 
1798 #undef __FUNCT__
1799 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1800 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1801 {
1802   Mat         C;
1803   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1804   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1805 
1806   PetscFunctionBegin;
1807   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1808 
1809   *B = 0;
1810   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1811   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1812   c    = (Mat_SeqBAIJ*)C->data;
1813 
1814   c->bs         = a->bs;
1815   c->bs2        = a->bs2;
1816   c->mbs        = a->mbs;
1817   c->nbs        = a->nbs;
1818   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1819 
1820   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1821   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1822   for (i=0; i<mbs; i++) {
1823     c->imax[i] = a->imax[i];
1824     c->ilen[i] = a->ilen[i];
1825   }
1826 
1827   /* allocate the matrix space */
1828   c->singlemalloc = PETSC_TRUE;
1829   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1830   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1831   c->j = (int*)(c->a + nz*bs2);
1832   c->i = c->j + nz;
1833   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1834   if (mbs > 0) {
1835     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1836     if (cpvalues == MAT_COPY_VALUES) {
1837       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1838     } else {
1839       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1840     }
1841   }
1842 
1843   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1844   c->sorted      = a->sorted;
1845   c->roworiented = a->roworiented;
1846   c->nonew       = a->nonew;
1847 
1848   if (a->diag) {
1849     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1850     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1851     for (i=0; i<mbs; i++) {
1852       c->diag[i] = a->diag[i];
1853     }
1854   } else c->diag        = 0;
1855   c->nz                 = a->nz;
1856   c->maxnz              = a->maxnz;
1857   c->solve_work         = 0;
1858   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1859   c->mult_work          = 0;
1860   C->preallocated       = PETSC_TRUE;
1861   C->assembled          = PETSC_TRUE;
1862   *B = C;
1863   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN_C_BEGIN
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatLoad_SeqBAIJ"
1870 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1871 {
1872   Mat_SeqBAIJ  *a;
1873   Mat          B;
1874   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1875   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1876   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1877   int          *masked,nmask,tmp,bs2,ishift;
1878   PetscScalar  *aa;
1879   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1880 #if defined(PETSC_HAVE_DSCPACK)
1881   PetscTruth   flag;
1882 #endif
1883 
1884   PetscFunctionBegin;
1885   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1886   bs2  = bs*bs;
1887 
1888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1889   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1890   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1891   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1892   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1893   M = header[1]; N = header[2]; nz = header[3];
1894 
1895   if (header[3] < 0) {
1896     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1897   }
1898 
1899   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1900 
1901   /*
1902      This code adds extra rows to make sure the number of rows is
1903     divisible by the blocksize
1904   */
1905   mbs        = M/bs;
1906   extra_rows = bs - M + bs*(mbs);
1907   if (extra_rows == bs) extra_rows = 0;
1908   else                  mbs++;
1909   if (extra_rows) {
1910     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1911   }
1912 
1913   /* read in row lengths */
1914   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1915   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1916   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1917 
1918   /* read in column indices */
1919   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1920   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1921   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1922 
1923   /* loop over row lengths determining block row lengths */
1924   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1925   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1926   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1927   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1928   masked   = mask + mbs;
1929   rowcount = 0; nzcount = 0;
1930   for (i=0; i<mbs; i++) {
1931     nmask = 0;
1932     for (j=0; j<bs; j++) {
1933       kmax = rowlengths[rowcount];
1934       for (k=0; k<kmax; k++) {
1935         tmp = jj[nzcount++]/bs;
1936         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1937       }
1938       rowcount++;
1939     }
1940     browlengths[i] += nmask;
1941     /* zero out the mask elements we set */
1942     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1943   }
1944 
1945   /* create our matrix */
1946   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1947   B = *A;
1948   a = (Mat_SeqBAIJ*)B->data;
1949 
1950   /* set matrix "i" values */
1951   a->i[0] = 0;
1952   for (i=1; i<= mbs; i++) {
1953     a->i[i]      = a->i[i-1] + browlengths[i-1];
1954     a->ilen[i-1] = browlengths[i-1];
1955   }
1956   a->nz         = 0;
1957   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1958 
1959   /* read in nonzero values */
1960   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1961   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1962   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1963 
1964   /* set "a" and "j" values into matrix */
1965   nzcount = 0; jcount = 0;
1966   for (i=0; i<mbs; i++) {
1967     nzcountb = nzcount;
1968     nmask    = 0;
1969     for (j=0; j<bs; j++) {
1970       kmax = rowlengths[i*bs+j];
1971       for (k=0; k<kmax; k++) {
1972         tmp = jj[nzcount++]/bs;
1973 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1974       }
1975     }
1976     /* sort the masked values */
1977     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1978 
1979     /* set "j" values into matrix */
1980     maskcount = 1;
1981     for (j=0; j<nmask; j++) {
1982       a->j[jcount++]  = masked[j];
1983       mask[masked[j]] = maskcount++;
1984     }
1985     /* set "a" values into matrix */
1986     ishift = bs2*a->i[i];
1987     for (j=0; j<bs; j++) {
1988       kmax = rowlengths[i*bs+j];
1989       for (k=0; k<kmax; k++) {
1990         tmp       = jj[nzcountb]/bs ;
1991         block     = mask[tmp] - 1;
1992         point     = jj[nzcountb] - bs*tmp;
1993         idx       = ishift + bs2*block + j + bs*point;
1994         a->a[idx] = (MatScalar)aa[nzcountb++];
1995       }
1996     }
1997     /* zero out the mask elements we set */
1998     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1999   }
2000   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2001 
2002   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2003   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2004   ierr = PetscFree(aa);CHKERRQ(ierr);
2005   ierr = PetscFree(jj);CHKERRQ(ierr);
2006   ierr = PetscFree(mask);CHKERRQ(ierr);
2007 
2008   B->assembled = PETSC_TRUE;
2009 
2010 #if defined(PETSC_HAVE_DSCPACK)
2011   ierr = PetscOptionsHasName(B->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
2012   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(B);CHKERRQ(ierr); }
2013 #endif
2014 
2015   ierr = MatView_Private(B);CHKERRQ(ierr);
2016   PetscFunctionReturn(0);
2017 }
2018 EXTERN_C_END
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatCreateSeqBAIJ"
2022 /*@C
2023    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2024    compressed row) format.  For good matrix assembly performance the
2025    user should preallocate the matrix storage by setting the parameter nz
2026    (or the array nnz).  By setting these parameters accurately, performance
2027    during matrix assembly can be increased by more than a factor of 50.
2028 
2029    Collective on MPI_Comm
2030 
2031    Input Parameters:
2032 +  comm - MPI communicator, set to PETSC_COMM_SELF
2033 .  bs - size of block
2034 .  m - number of rows
2035 .  n - number of columns
2036 .  nz - number of nonzero blocks  per block row (same for all rows)
2037 -  nnz - array containing the number of nonzero blocks in the various block rows
2038          (possibly different for each block row) or PETSC_NULL
2039 
2040    Output Parameter:
2041 .  A - the matrix
2042 
2043    Options Database Keys:
2044 .   -mat_no_unroll - uses code that does not unroll the loops in the
2045                      block calculations (much slower)
2046 .    -mat_block_size - size of the blocks to use
2047 
2048    Level: intermediate
2049 
2050    Notes:
2051    A nonzero block is any block that as 1 or more nonzeros in it
2052 
2053    The block AIJ format is fully compatible with standard Fortran 77
2054    storage.  That is, the stored row and column indices can begin at
2055    either one (as in Fortran) or zero.  See the users' manual for details.
2056 
2057    Specify the preallocated storage with either nz or nnz (not both).
2058    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2059    allocation.  For additional details, see the users manual chapter on
2060    matrices.
2061 
2062 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2063 @*/
2064 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2065 {
2066   int ierr;
2067 
2068   PetscFunctionBegin;
2069   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2070   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2071   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2077 /*@C
2078    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2079    per row in the matrix. For good matrix assembly performance the
2080    user should preallocate the matrix storage by setting the parameter nz
2081    (or the array nnz).  By setting these parameters accurately, performance
2082    during matrix assembly can be increased by more than a factor of 50.
2083 
2084    Collective on MPI_Comm
2085 
2086    Input Parameters:
2087 +  A - the matrix
2088 .  bs - size of block
2089 .  nz - number of block nonzeros per block row (same for all rows)
2090 -  nnz - array containing the number of block nonzeros in the various block rows
2091          (possibly different for each block row) or PETSC_NULL
2092 
2093    Options Database Keys:
2094 .   -mat_no_unroll - uses code that does not unroll the loops in the
2095                      block calculations (much slower)
2096 .    -mat_block_size - size of the blocks to use
2097 
2098    Level: intermediate
2099 
2100    Notes:
2101    The block AIJ format is fully compatible with standard Fortran 77
2102    storage.  That is, the stored row and column indices can begin at
2103    either one (as in Fortran) or zero.  See the users' manual for details.
2104 
2105    Specify the preallocated storage with either nz or nnz (not both).
2106    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2107    allocation.  For additional details, see the users manual chapter on
2108    matrices.
2109 
2110 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2111 @*/
2112 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2113 {
2114   int ierr,(*f)(Mat,int,int,const int[]);
2115 
2116   PetscFunctionBegin;
2117   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2118   if (f) {
2119     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123