xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cd0089b03c240696c596d4fef40bc47e257771ec)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25 {
26   Mat               A = *AA;
27   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
28   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int               *ai=a->i,*ailen=a->ilen;
30   int               *aj=a->j,stepval;
31   const PetscScalar *value = v;
32   MatScalar         *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 
163 #define CHUNKSIZE  10
164 
165 /*
166      Checks for missing diagonals
167 */
168 #undef __FUNCT__
169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
170 int MatMissingDiagonal_SeqBAIJ(Mat A)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173   int         *diag,*jj = a->j,i,ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
177   diag = a->diag;
178   for (i=0; i<a->mbs; i++) {
179     if (jj[diag[i]] != i) {
180       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181     }
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
188 int MatMarkDiagonal_SeqBAIJ(Mat A)
189 {
190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191   int         i,j,*diag,m = a->mbs,ierr;
192 
193   PetscFunctionBegin;
194   if (a->diag) PetscFunctionReturn(0);
195 
196   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
197   PetscLogObjectMemory(A,(m+1)*sizeof(int));
198   for (i=0; i<m; i++) {
199     diag[i] = a->i[i+1];
200     for (j=a->i[i]; j<a->i[i+1]; j++) {
201       if (a->j[j] == i) {
202         diag[i] = j;
203         break;
204       }
205     }
206   }
207   a->diag = diag;
208   PetscFunctionReturn(0);
209 }
210 
211 
212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217 {
218   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219   int         ierr,n = a->mbs,i;
220 
221   PetscFunctionBegin;
222   *nn = n;
223   if (!ia) PetscFunctionReturn(0);
224   if (symmetric) {
225     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
226   } else if (oshift == 1) {
227     /* temporarily add 1 to i and j indices */
228     int nz = a->i[n];
229     for (i=0; i<nz; i++) a->j[i]++;
230     for (i=0; i<n+1; i++) a->i[i]++;
231     *ia = a->i; *ja = a->j;
232   } else {
233     *ia = a->i; *ja = a->j;
234   }
235 
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242 {
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244   int         i,n = a->mbs,ierr;
245 
246   PetscFunctionBegin;
247   if (!ia) PetscFunctionReturn(0);
248   if (symmetric) {
249     ierr = PetscFree(*ia);CHKERRQ(ierr);
250     ierr = PetscFree(*ja);CHKERRQ(ierr);
251   } else if (oshift == 1) {
252     int nz = a->i[n]-1;
253     for (i=0; i<nz; i++) a->j[i]--;
254     for (i=0; i<n+1; i++) a->i[i]--;
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262 {
263   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
264 
265   PetscFunctionBegin;
266   *bs = baij->bs;
267   PetscFunctionReturn(0);
268 }
269 
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_SeqBAIJ"
273 int MatDestroy_SeqBAIJ(Mat A)
274 {
275   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276   int         ierr;
277 
278   PetscFunctionBegin;
279 #if defined(PETSC_USE_LOG)
280   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281 #endif
282   ierr = PetscFree(a->a);CHKERRQ(ierr);
283   if (!a->singlemalloc) {
284     ierr = PetscFree(a->i);CHKERRQ(ierr);
285     ierr = PetscFree(a->j);CHKERRQ(ierr);
286   }
287   if (a->row) {
288     ierr = ISDestroy(a->row);CHKERRQ(ierr);
289   }
290   if (a->col) {
291     ierr = ISDestroy(a->col);CHKERRQ(ierr);
292   }
293   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
294   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
295   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
296   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
297   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
298   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
299   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
300 #if defined(PETSC_USE_MAT_SINGLE)
301   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
302 #endif
303   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
304 
305   ierr = PetscFree(a);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSetOption_SeqBAIJ"
311 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312 {
313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
314 
315   PetscFunctionBegin;
316   switch (op) {
317   case MAT_ROW_ORIENTED:
318     a->roworiented    = PETSC_TRUE;
319     break;
320   case MAT_COLUMN_ORIENTED:
321     a->roworiented    = PETSC_FALSE;
322     break;
323   case MAT_COLUMNS_SORTED:
324     a->sorted         = PETSC_TRUE;
325     break;
326   case MAT_COLUMNS_UNSORTED:
327     a->sorted         = PETSC_FALSE;
328     break;
329   case MAT_KEEP_ZEROED_ROWS:
330     a->keepzeroedrows = PETSC_TRUE;
331     break;
332   case MAT_NO_NEW_NONZERO_LOCATIONS:
333     a->nonew          = 1;
334     break;
335   case MAT_NEW_NONZERO_LOCATION_ERR:
336     a->nonew          = -1;
337     break;
338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339     a->nonew          = -2;
340     break;
341   case MAT_YES_NEW_NONZERO_LOCATIONS:
342     a->nonew          = 0;
343     break;
344   case MAT_ROWS_SORTED:
345   case MAT_ROWS_UNSORTED:
346   case MAT_YES_NEW_DIAGONALS:
347   case MAT_IGNORE_OFF_PROC_ENTRIES:
348   case MAT_USE_HASH_TABLE:
349     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350     break;
351   case MAT_NO_NEW_DIAGONALS:
352     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353   case MAT_SYMMETRIC:
354   case MAT_STRUCTURALLY_SYMMETRIC:
355   case MAT_NOT_SYMMETRIC:
356   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
357   case MAT_HERMITIAN:
358   case MAT_NOT_HERMITIAN:
359   case MAT_SYMMETRY_ETERNAL:
360   case MAT_NOT_SYMMETRY_ETERNAL:
361     break;
362   default:
363     SETERRQ(PETSC_ERR_SUP,"unknown option");
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "MatGetRow_SeqBAIJ"
370 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
371 {
372   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
373   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
374   MatScalar    *aa,*aa_i;
375   PetscScalar  *v_i;
376 
377   PetscFunctionBegin;
378   bs  = a->bs;
379   ai  = a->i;
380   aj  = a->j;
381   aa  = a->a;
382   bs2 = a->bs2;
383 
384   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
385 
386   bn  = row/bs;   /* Block number */
387   bp  = row % bs; /* Block Position */
388   M   = ai[bn+1] - ai[bn];
389   *nz = bs*M;
390 
391   if (v) {
392     *v = 0;
393     if (*nz) {
394       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
395       for (i=0; i<M; i++) { /* for each block in the block row */
396         v_i  = *v + i*bs;
397         aa_i = aa + bs2*(ai[bn] + i);
398         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
399       }
400     }
401   }
402 
403   if (idx) {
404     *idx = 0;
405     if (*nz) {
406       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
407       for (i=0; i<M; i++) { /* for each block in the block row */
408         idx_i = *idx + i*bs;
409         itmp  = bs*aj[ai[bn] + i];
410         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
411       }
412     }
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
419 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
420 {
421   int ierr;
422 
423   PetscFunctionBegin;
424   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
425   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatTranspose_SeqBAIJ"
431 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
432 {
433   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
434   Mat         C;
435   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
436   int         *rows,*cols,bs2=a->bs2;
437   PetscScalar *array;
438 
439   PetscFunctionBegin;
440   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
441   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
442   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
443 
444 #if defined(PETSC_USE_MAT_SINGLE)
445   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
446   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
447 #else
448   array = a->a;
449 #endif
450 
451   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
452   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
453   ierr = PetscFree(col);CHKERRQ(ierr);
454   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
455   cols = rows + bs;
456   for (i=0; i<mbs; i++) {
457     cols[0] = i*bs;
458     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
459     len = ai[i+1] - ai[i];
460     for (j=0; j<len; j++) {
461       rows[0] = (*aj++)*bs;
462       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
463       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
464       array += bs2;
465     }
466   }
467   ierr = PetscFree(rows);CHKERRQ(ierr);
468 #if defined(PETSC_USE_MAT_SINGLE)
469   ierr = PetscFree(array);
470 #endif
471 
472   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
473   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
474 
475   if (B) {
476     *B = C;
477   } else {
478     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
485 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
486 {
487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
488   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
489   PetscScalar *aa;
490   FILE        *file;
491 
492   PetscFunctionBegin;
493   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
494   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
495   col_lens[0] = MAT_FILE_COOKIE;
496 
497   col_lens[1] = A->m;
498   col_lens[2] = A->n;
499   col_lens[3] = a->nz*bs2;
500 
501   /* store lengths of each row and write (including header) to file */
502   count = 0;
503   for (i=0; i<a->mbs; i++) {
504     for (j=0; j<bs; j++) {
505       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
506     }
507   }
508   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
509   ierr = PetscFree(col_lens);CHKERRQ(ierr);
510 
511   /* store column indices (zero start index) */
512   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<a->mbs; i++) {
515     for (j=0; j<bs; j++) {
516       for (k=a->i[i]; k<a->i[i+1]; k++) {
517         for (l=0; l<bs; l++) {
518           jj[count++] = bs*a->j[k] + l;
519         }
520       }
521     }
522   }
523   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
524   ierr = PetscFree(jj);CHKERRQ(ierr);
525 
526   /* store nonzero values */
527   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
528   count = 0;
529   for (i=0; i<a->mbs; i++) {
530     for (j=0; j<bs; j++) {
531       for (k=a->i[i]; k<a->i[i+1]; k++) {
532         for (l=0; l<bs; l++) {
533           aa[count++] = a->a[bs2*k + l*bs + j];
534         }
535       }
536     }
537   }
538   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
539   ierr = PetscFree(aa);CHKERRQ(ierr);
540 
541   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
542   if (file) {
543     fprintf(file,"-matload_block_size %d\n",a->bs);
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
550 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
551 {
552   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
553   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
554   PetscViewerFormat format;
555 
556   PetscFunctionBegin;
557   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
558   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
559     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
560   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
561     Mat aij;
562     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
563     ierr = MatView(aij,viewer);CHKERRQ(ierr);
564     ierr = MatDestroy(aij);CHKERRQ(ierr);
565   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
566      PetscFunctionReturn(0);
567   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
568     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
569     for (i=0; i<a->mbs; i++) {
570       for (j=0; j<bs; j++) {
571         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
572         for (k=a->i[i]; k<a->i[i+1]; k++) {
573           for (l=0; l<bs; l++) {
574 #if defined(PETSC_USE_COMPLEX)
575             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
576               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
577                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
578             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
579               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
580                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
581             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
582               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
583             }
584 #else
585             if (a->a[bs2*k + l*bs + j] != 0.0) {
586               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
587             }
588 #endif
589           }
590         }
591         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
592       }
593     }
594     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
595   } else {
596     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
597     for (i=0; i<a->mbs; i++) {
598       for (j=0; j<bs; j++) {
599         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
600         for (k=a->i[i]; k<a->i[i+1]; k++) {
601           for (l=0; l<bs; l++) {
602 #if defined(PETSC_USE_COMPLEX)
603             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
604               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
605                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
606             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
607               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
608                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
609             } else {
610               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
611             }
612 #else
613             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
614 #endif
615           }
616         }
617         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
618       }
619     }
620     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
621   }
622   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
628 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
629 {
630   Mat          A = (Mat) Aa;
631   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
632   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
633   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
634   MatScalar    *aa;
635   PetscViewer  viewer;
636 
637   PetscFunctionBegin;
638 
639   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
640   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
641 
642   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
643 
644   /* loop over matrix elements drawing boxes */
645   color = PETSC_DRAW_BLUE;
646   for (i=0,row=0; i<mbs; i++,row+=bs) {
647     for (j=a->i[i]; j<a->i[i+1]; j++) {
648       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
649       x_l = a->j[j]*bs; x_r = x_l + 1.0;
650       aa = a->a + j*bs2;
651       for (k=0; k<bs; k++) {
652         for (l=0; l<bs; l++) {
653           if (PetscRealPart(*aa++) >=  0.) continue;
654           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
655         }
656       }
657     }
658   }
659   color = PETSC_DRAW_CYAN;
660   for (i=0,row=0; i<mbs; i++,row+=bs) {
661     for (j=a->i[i]; j<a->i[i+1]; j++) {
662       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
663       x_l = a->j[j]*bs; x_r = x_l + 1.0;
664       aa = a->a + j*bs2;
665       for (k=0; k<bs; k++) {
666         for (l=0; l<bs; l++) {
667           if (PetscRealPart(*aa++) != 0.) continue;
668           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
669         }
670       }
671     }
672   }
673 
674   color = PETSC_DRAW_RED;
675   for (i=0,row=0; i<mbs; i++,row+=bs) {
676     for (j=a->i[i]; j<a->i[i+1]; j++) {
677       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
678       x_l = a->j[j]*bs; x_r = x_l + 1.0;
679       aa = a->a + j*bs2;
680       for (k=0; k<bs; k++) {
681         for (l=0; l<bs; l++) {
682           if (PetscRealPart(*aa++) <= 0.) continue;
683           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
684         }
685       }
686     }
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
693 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
694 {
695   int          ierr;
696   PetscReal    xl,yl,xr,yr,w,h;
697   PetscDraw    draw;
698   PetscTruth   isnull;
699 
700   PetscFunctionBegin;
701 
702   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
703   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
704 
705   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
706   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
707   xr += w;    yr += h;  xl = -w;     yl = -h;
708   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
709   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
710   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatView_SeqBAIJ"
716 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
717 {
718   int        ierr;
719   PetscTruth issocket,isascii,isbinary,isdraw;
720 
721   PetscFunctionBegin;
722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
723   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
724   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
725   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
726   if (issocket) {
727     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
728   } else if (isascii){
729     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
730   } else if (isbinary) {
731     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
732   } else if (isdraw) {
733     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatGetValues_SeqBAIJ"
743 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
744 {
745   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
746   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
747   int        *ai = a->i,*ailen = a->ilen;
748   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
749   MatScalar  *ap,*aa = a->a,zero = 0.0;
750 
751   PetscFunctionBegin;
752   for (k=0; k<m; k++) { /* loop over rows */
753     row  = im[k]; brow = row/bs;
754     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
755     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
756     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
757     nrow = ailen[brow];
758     for (l=0; l<n; l++) { /* loop over columns */
759       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
760       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
761       col  = in[l] ;
762       bcol = col/bs;
763       cidx = col%bs;
764       ridx = row%bs;
765       high = nrow;
766       low  = 0; /* assume unsorted */
767       while (high-low > 5) {
768         t = (low+high)/2;
769         if (rp[t] > bcol) high = t;
770         else             low  = t;
771       }
772       for (i=low; i<high; i++) {
773         if (rp[i] > bcol) break;
774         if (rp[i] == bcol) {
775           *v++ = ap[bs2*i+bs*cidx+ridx];
776           goto finished;
777         }
778       }
779       *v++ = zero;
780       finished:;
781     }
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #if defined(PETSC_USE_MAT_SINGLE)
787 #undef __FUNCT__
788 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
789 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
790 {
791   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
792   int         ierr,i,N = m*n*b->bs2;
793   MatScalar   *vsingle;
794 
795   PetscFunctionBegin;
796   if (N > b->setvalueslen) {
797     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
798     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
799     b->setvalueslen  = N;
800   }
801   vsingle = b->setvaluescopy;
802   for (i=0; i<N; i++) {
803     vsingle[i] = v[i];
804   }
805   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 #endif
809 
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
813 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
814 {
815   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
816   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
817   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
818   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
819   PetscTruth        roworiented=a->roworiented;
820   const MatScalar   *value = v;
821   MatScalar         *ap,*aa = a->a,*bap;
822 
823   PetscFunctionBegin;
824   if (roworiented) {
825     stepval = (n-1)*bs;
826   } else {
827     stepval = (m-1)*bs;
828   }
829   for (k=0; k<m; k++) { /* loop over added rows */
830     row  = im[k];
831     if (row < 0) continue;
832 #if defined(PETSC_USE_BOPT_g)
833     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
834 #endif
835     rp   = aj + ai[row];
836     ap   = aa + bs2*ai[row];
837     rmax = imax[row];
838     nrow = ailen[row];
839     low  = 0;
840     for (l=0; l<n; l++) { /* loop over added columns */
841       if (in[l] < 0) continue;
842 #if defined(PETSC_USE_BOPT_g)
843       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
844 #endif
845       col = in[l];
846       if (roworiented) {
847         value = v + k*(stepval+bs)*bs + l*bs;
848       } else {
849         value = v + l*(stepval+bs)*bs + k*bs;
850       }
851       if (!sorted) low = 0; high = nrow;
852       while (high-low > 7) {
853         t = (low+high)/2;
854         if (rp[t] > col) high = t;
855         else             low  = t;
856       }
857       for (i=low; i<high; i++) {
858         if (rp[i] > col) break;
859         if (rp[i] == col) {
860           bap  = ap +  bs2*i;
861           if (roworiented) {
862             if (is == ADD_VALUES) {
863               for (ii=0; ii<bs; ii++,value+=stepval) {
864                 for (jj=ii; jj<bs2; jj+=bs) {
865                   bap[jj] += *value++;
866                 }
867               }
868             } else {
869               for (ii=0; ii<bs; ii++,value+=stepval) {
870                 for (jj=ii; jj<bs2; jj+=bs) {
871                   bap[jj] = *value++;
872                 }
873               }
874             }
875           } else {
876             if (is == ADD_VALUES) {
877               for (ii=0; ii<bs; ii++,value+=stepval) {
878                 for (jj=0; jj<bs; jj++) {
879                   *bap++ += *value++;
880                 }
881               }
882             } else {
883               for (ii=0; ii<bs; ii++,value+=stepval) {
884                 for (jj=0; jj<bs; jj++) {
885                   *bap++  = *value++;
886                 }
887               }
888             }
889           }
890           goto noinsert2;
891         }
892       }
893       if (nonew == 1) goto noinsert2;
894       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
895       if (nrow >= rmax) {
896         /* there is no extra room in row, therefore enlarge */
897         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
898         MatScalar *new_a;
899 
900         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
901 
902         /* malloc new storage space */
903         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
904 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
905         new_j   = (int*)(new_a + bs2*new_nz);
906         new_i   = new_j + new_nz;
907 
908         /* copy over old data into new slots */
909         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
910         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
911         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
912         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
913         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
914         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
915         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
916         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
917         /* free up old matrix storage */
918         ierr = PetscFree(a->a);CHKERRQ(ierr);
919         if (!a->singlemalloc) {
920           ierr = PetscFree(a->i);CHKERRQ(ierr);
921           ierr = PetscFree(a->j);CHKERRQ(ierr);
922         }
923         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
924         a->singlemalloc = PETSC_TRUE;
925 
926         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
927         rmax = imax[row] = imax[row] + CHUNKSIZE;
928         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
929         a->maxnz += bs2*CHUNKSIZE;
930         a->reallocs++;
931         a->nz++;
932       }
933       N = nrow++ - 1;
934       /* shift up all the later entries in this row */
935       for (ii=N; ii>=i; ii--) {
936         rp[ii+1] = rp[ii];
937         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
938       }
939       if (N >= i) {
940         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
941       }
942       rp[i] = col;
943       bap   = ap +  bs2*i;
944       if (roworiented) {
945         for (ii=0; ii<bs; ii++,value+=stepval) {
946           for (jj=ii; jj<bs2; jj+=bs) {
947             bap[jj] = *value++;
948           }
949         }
950       } else {
951         for (ii=0; ii<bs; ii++,value+=stepval) {
952           for (jj=0; jj<bs; jj++) {
953             *bap++  = *value++;
954           }
955         }
956       }
957       noinsert2:;
958       low = i;
959     }
960     ailen[row] = nrow;
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
967 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
968 {
969   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
970   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
971   int        m = A->m,*ip,N,*ailen = a->ilen;
972   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
973   MatScalar  *aa = a->a,*ap;
974 
975   PetscFunctionBegin;
976   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
977 
978   if (m) rmax = ailen[0];
979   for (i=1; i<mbs; i++) {
980     /* move each row back by the amount of empty slots (fshift) before it*/
981     fshift += imax[i-1] - ailen[i-1];
982     rmax   = PetscMax(rmax,ailen[i]);
983     if (fshift) {
984       ip = aj + ai[i]; ap = aa + bs2*ai[i];
985       N = ailen[i];
986       for (j=0; j<N; j++) {
987         ip[j-fshift] = ip[j];
988         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
989       }
990     }
991     ai[i] = ai[i-1] + ailen[i-1];
992   }
993   if (mbs) {
994     fshift += imax[mbs-1] - ailen[mbs-1];
995     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
996   }
997   /* reset ilen and imax for each row */
998   for (i=0; i<mbs; i++) {
999     ailen[i] = imax[i] = ai[i+1] - ai[i];
1000   }
1001   a->nz = ai[mbs];
1002 
1003   /* diagonals may have moved, so kill the diagonal pointers */
1004   if (fshift && a->diag) {
1005     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1006     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1007     a->diag = 0;
1008   }
1009   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1010   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1011   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1012   a->reallocs          = 0;
1013   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1014 
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 
1019 
1020 /*
1021    This function returns an array of flags which indicate the locations of contiguous
1022    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1023    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1024    Assume: sizes should be long enough to hold all the values.
1025 */
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1028 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1029 {
1030   int        i,j,k,row;
1031   PetscTruth flg;
1032 
1033   PetscFunctionBegin;
1034   for (i=0,j=0; i<n; j++) {
1035     row = idx[i];
1036     if (row%bs!=0) { /* Not the begining of a block */
1037       sizes[j] = 1;
1038       i++;
1039     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1040       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1041       i++;
1042     } else { /* Begining of the block, so check if the complete block exists */
1043       flg = PETSC_TRUE;
1044       for (k=1; k<bs; k++) {
1045         if (row+k != idx[i+k]) { /* break in the block */
1046           flg = PETSC_FALSE;
1047           break;
1048         }
1049       }
1050       if (flg == PETSC_TRUE) { /* No break in the bs */
1051         sizes[j] = bs;
1052         i+= bs;
1053       } else {
1054         sizes[j] = 1;
1055         i++;
1056       }
1057     }
1058   }
1059   *bs_max = j;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1065 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1066 {
1067   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1068   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1069   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1070   PetscScalar zero = 0.0;
1071   MatScalar   *aa;
1072 
1073   PetscFunctionBegin;
1074   /* Make a copy of the IS and  sort it */
1075   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1076   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1077 
1078   /* allocate memory for rows,sizes */
1079   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1080   sizes = rows + is_n;
1081 
1082   /* copy IS values to rows, and sort them */
1083   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1084   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1085   if (baij->keepzeroedrows) {
1086     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1087     bs_max = is_n;
1088   } else {
1089     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1090   }
1091   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1092 
1093   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1094     row   = rows[j];
1095     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1096     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1097     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1098     if (sizes[i] == bs && !baij->keepzeroedrows) {
1099       if (diag) {
1100         if (baij->ilen[row/bs] > 0) {
1101           baij->ilen[row/bs]       = 1;
1102           baij->j[baij->i[row/bs]] = row/bs;
1103           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1104         }
1105         /* Now insert all the diagonal values for this bs */
1106         for (k=0; k<bs; k++) {
1107           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1108         }
1109       } else { /* (!diag) */
1110         baij->ilen[row/bs] = 0;
1111       } /* end (!diag) */
1112     } else { /* (sizes[i] != bs) */
1113 #if defined (PETSC_USE_DEBUG)
1114       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1115 #endif
1116       for (k=0; k<count; k++) {
1117         aa[0] =  zero;
1118         aa    += bs;
1119       }
1120       if (diag) {
1121         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1122       }
1123     }
1124   }
1125 
1126   ierr = PetscFree(rows);CHKERRQ(ierr);
1127   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1133 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1134 {
1135   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1136   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1137   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1138   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1139   int         ridx,cidx,bs2=a->bs2,ierr;
1140   PetscTruth  roworiented=a->roworiented;
1141   MatScalar   *ap,value,*aa=a->a,*bap;
1142 
1143   PetscFunctionBegin;
1144   for (k=0; k<m; k++) { /* loop over added rows */
1145     row  = im[k]; brow = row/bs;
1146     if (row < 0) continue;
1147 #if defined(PETSC_USE_BOPT_g)
1148     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1149 #endif
1150     rp   = aj + ai[brow];
1151     ap   = aa + bs2*ai[brow];
1152     rmax = imax[brow];
1153     nrow = ailen[brow];
1154     low  = 0;
1155     for (l=0; l<n; l++) { /* loop over added columns */
1156       if (in[l] < 0) continue;
1157 #if defined(PETSC_USE_BOPT_g)
1158       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1159 #endif
1160       col = in[l]; bcol = col/bs;
1161       ridx = row % bs; cidx = col % bs;
1162       if (roworiented) {
1163         value = v[l + k*n];
1164       } else {
1165         value = v[k + l*m];
1166       }
1167       if (!sorted) low = 0; high = nrow;
1168       while (high-low > 7) {
1169         t = (low+high)/2;
1170         if (rp[t] > bcol) high = t;
1171         else              low  = t;
1172       }
1173       for (i=low; i<high; i++) {
1174         if (rp[i] > bcol) break;
1175         if (rp[i] == bcol) {
1176           bap  = ap +  bs2*i + bs*cidx + ridx;
1177           if (is == ADD_VALUES) *bap += value;
1178           else                  *bap  = value;
1179           goto noinsert1;
1180         }
1181       }
1182       if (nonew == 1) goto noinsert1;
1183       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1184       if (nrow >= rmax) {
1185         /* there is no extra room in row, therefore enlarge */
1186         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1187         MatScalar *new_a;
1188 
1189         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1190 
1191         /* Malloc new storage space */
1192         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1193 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1194         new_j   = (int*)(new_a + bs2*new_nz);
1195         new_i   = new_j + new_nz;
1196 
1197         /* copy over old data into new slots */
1198         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1199         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1200         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1201         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1202         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1203         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1204         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1205         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1206         /* free up old matrix storage */
1207         ierr = PetscFree(a->a);CHKERRQ(ierr);
1208         if (!a->singlemalloc) {
1209           ierr = PetscFree(a->i);CHKERRQ(ierr);
1210           ierr = PetscFree(a->j);CHKERRQ(ierr);
1211         }
1212         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1213         a->singlemalloc = PETSC_TRUE;
1214 
1215         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1216         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1217         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1218         a->maxnz += bs2*CHUNKSIZE;
1219         a->reallocs++;
1220         a->nz++;
1221       }
1222       N = nrow++ - 1;
1223       /* shift up all the later entries in this row */
1224       for (ii=N; ii>=i; ii--) {
1225         rp[ii+1] = rp[ii];
1226         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1227       }
1228       if (N>=i) {
1229         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1230       }
1231       rp[i]                      = bcol;
1232       ap[bs2*i + bs*cidx + ridx] = value;
1233       noinsert1:;
1234       low = i;
1235     }
1236     ailen[brow] = nrow;
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1244 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1245 {
1246   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1247   Mat         outA;
1248   int         ierr;
1249   PetscTruth  row_identity,col_identity;
1250 
1251   PetscFunctionBegin;
1252   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1253   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1254   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1255   if (!row_identity || !col_identity) {
1256     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1257   }
1258 
1259   outA          = inA;
1260   inA->factor   = FACTOR_LU;
1261 
1262   if (!a->diag) {
1263     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1264   }
1265 
1266   a->row        = row;
1267   a->col        = col;
1268   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1269   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1270 
1271   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1272   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1273   PetscLogObjectParent(inA,a->icol);
1274 
1275   /*
1276       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1277       for ILU(0) factorization with natural ordering
1278   */
1279   if (a->bs < 8) {
1280     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1281   } else {
1282     if (!a->solve_work) {
1283       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1284       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1285     }
1286   }
1287 
1288   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1289 
1290   PetscFunctionReturn(0);
1291 }
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1294 int MatPrintHelp_SeqBAIJ(Mat A)
1295 {
1296   static PetscTruth called = PETSC_FALSE;
1297   MPI_Comm          comm = A->comm;
1298   int               ierr;
1299 
1300   PetscFunctionBegin;
1301   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1302   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1303   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 EXTERN_C_BEGIN
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1310 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1311 {
1312   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1313   int         i,nz,nbs;
1314 
1315   PetscFunctionBegin;
1316   nz  = baij->maxnz/baij->bs2;
1317   nbs = baij->nbs;
1318   for (i=0; i<nz; i++) {
1319     baij->j[i] = indices[i];
1320   }
1321   baij->nz = nz;
1322   for (i=0; i<nbs; i++) {
1323     baij->ilen[i] = baij->imax[i];
1324   }
1325 
1326   PetscFunctionReturn(0);
1327 }
1328 EXTERN_C_END
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1332 /*@
1333     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1334        in the matrix.
1335 
1336   Input Parameters:
1337 +  mat - the SeqBAIJ matrix
1338 -  indices - the column indices
1339 
1340   Level: advanced
1341 
1342   Notes:
1343     This can be called if you have precomputed the nonzero structure of the
1344   matrix and want to provide it to the matrix object to improve the performance
1345   of the MatSetValues() operation.
1346 
1347     You MUST have set the correct numbers of nonzeros per row in the call to
1348   MatCreateSeqBAIJ().
1349 
1350     MUST be called before any calls to MatSetValues();
1351 
1352 @*/
1353 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1354 {
1355   int ierr,(*f)(Mat,int *);
1356 
1357   PetscFunctionBegin;
1358   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1359   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1360   if (f) {
1361     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1362   } else {
1363     SETERRQ(1,"Wrong type of matrix to set column indices");
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1370 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1371 {
1372   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1373   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1374   PetscReal    atmp;
1375   PetscScalar  *x,zero = 0.0;
1376   MatScalar    *aa;
1377   int          ncols,brow,krow,kcol;
1378 
1379   PetscFunctionBegin;
1380   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1381   bs   = a->bs;
1382   aa   = a->a;
1383   ai   = a->i;
1384   aj   = a->j;
1385   mbs = a->mbs;
1386 
1387   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1388   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1389   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1390   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1391   for (i=0; i<mbs; i++) {
1392     ncols = ai[1] - ai[0]; ai++;
1393     brow  = bs*i;
1394     for (j=0; j<ncols; j++){
1395       /* bcol = bs*(*aj); */
1396       for (kcol=0; kcol<bs; kcol++){
1397         for (krow=0; krow<bs; krow++){
1398           atmp = PetscAbsScalar(*aa); aa++;
1399           row = brow + krow;    /* row index */
1400           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1401           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1402         }
1403       }
1404       aj++;
1405     }
1406   }
1407   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1413 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1414 {
1415   int        ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1424 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1425 {
1426   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1427   PetscFunctionBegin;
1428   *array = a->a;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1434 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1435 {
1436   PetscFunctionBegin;
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #include "petscblaslapack.h"
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1443 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1444 {
1445   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1446   int          ierr,one=1,i,bs=y->bs,j,bs2;
1447 
1448   PetscFunctionBegin;
1449   if (str == SAME_NONZERO_PATTERN) {
1450     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1451   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1452     if (y->xtoy && y->XtoY != X) {
1453       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1454       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1455     }
1456     if (!y->xtoy) { /* get xtoy */
1457       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1458       y->XtoY = X;
1459     }
1460     bs2 = bs*bs;
1461     for (i=0; i<x->nz; i++) {
1462       j = 0;
1463       while (j < bs2){
1464         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1465         j++;
1466       }
1467     }
1468     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1469   } else {
1470     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /* -------------------------------------------------------------------*/
1476 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1477        MatGetRow_SeqBAIJ,
1478        MatRestoreRow_SeqBAIJ,
1479        MatMult_SeqBAIJ_N,
1480 /* 4*/ MatMultAdd_SeqBAIJ_N,
1481        MatMultTranspose_SeqBAIJ,
1482        MatMultTransposeAdd_SeqBAIJ,
1483        MatSolve_SeqBAIJ_N,
1484        0,
1485        0,
1486 /*10*/ 0,
1487        MatLUFactor_SeqBAIJ,
1488        0,
1489        0,
1490        MatTranspose_SeqBAIJ,
1491 /*15*/ MatGetInfo_SeqBAIJ,
1492        MatEqual_SeqBAIJ,
1493        MatGetDiagonal_SeqBAIJ,
1494        MatDiagonalScale_SeqBAIJ,
1495        MatNorm_SeqBAIJ,
1496 /*20*/ 0,
1497        MatAssemblyEnd_SeqBAIJ,
1498        0,
1499        MatSetOption_SeqBAIJ,
1500        MatZeroEntries_SeqBAIJ,
1501 /*25*/ MatZeroRows_SeqBAIJ,
1502        MatLUFactorSymbolic_SeqBAIJ,
1503        MatLUFactorNumeric_SeqBAIJ_N,
1504        0,
1505        0,
1506 /*30*/ MatSetUpPreallocation_SeqBAIJ,
1507        MatILUFactorSymbolic_SeqBAIJ,
1508        0,
1509        MatGetArray_SeqBAIJ,
1510        MatRestoreArray_SeqBAIJ,
1511 /*35*/ MatDuplicate_SeqBAIJ,
1512        0,
1513        0,
1514        MatILUFactor_SeqBAIJ,
1515        0,
1516 /*40*/ MatAXPY_SeqBAIJ,
1517        MatGetSubMatrices_SeqBAIJ,
1518        MatIncreaseOverlap_SeqBAIJ,
1519        MatGetValues_SeqBAIJ,
1520        0,
1521 /*45*/ MatPrintHelp_SeqBAIJ,
1522        MatScale_SeqBAIJ,
1523        0,
1524        0,
1525        0,
1526 /*50*/ MatGetBlockSize_SeqBAIJ,
1527        MatGetRowIJ_SeqBAIJ,
1528        MatRestoreRowIJ_SeqBAIJ,
1529        0,
1530        0,
1531 /*55*/ 0,
1532        0,
1533        0,
1534        0,
1535        MatSetValuesBlocked_SeqBAIJ,
1536 /*60*/ MatGetSubMatrix_SeqBAIJ,
1537        MatDestroy_SeqBAIJ,
1538        MatView_SeqBAIJ,
1539        MatGetPetscMaps_Petsc,
1540        0,
1541 /*65*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*70*/ MatGetRowMax_SeqBAIJ,
1547        MatConvert_Basic,
1548        0,
1549        0,
1550        0,
1551 /*75*/ 0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*80*/ 0,
1557        0,
1558        0,
1559        0,
1560 /*85*/ MatLoad_SeqBAIJ
1561 };
1562 
1563 EXTERN_C_BEGIN
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1566 int MatStoreValues_SeqBAIJ(Mat mat)
1567 {
1568   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1569   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1570   int         ierr;
1571 
1572   PetscFunctionBegin;
1573   if (aij->nonew != 1) {
1574     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1575   }
1576 
1577   /* allocate space for values if not already there */
1578   if (!aij->saved_values) {
1579     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1580   }
1581 
1582   /* copy values over */
1583   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 EXTERN_C_END
1587 
1588 EXTERN_C_BEGIN
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1591 int MatRetrieveValues_SeqBAIJ(Mat mat)
1592 {
1593   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1594   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1595 
1596   PetscFunctionBegin;
1597   if (aij->nonew != 1) {
1598     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1599   }
1600   if (!aij->saved_values) {
1601     SETERRQ(1,"Must call MatStoreValues(A);first");
1602   }
1603 
1604   /* copy values over */
1605   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 EXTERN_C_BEGIN
1611 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1612 EXTERN_C_END
1613 
1614 EXTERN_C_BEGIN
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1617 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1618 {
1619   Mat_SeqBAIJ *b;
1620   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1621   PetscTruth  flg;
1622 
1623   PetscFunctionBegin;
1624 
1625   B->preallocated = PETSC_TRUE;
1626   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1627   if (nnz && newbs != bs) {
1628     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1629   }
1630   bs   = newbs;
1631 
1632   mbs  = B->m/bs;
1633   nbs  = B->n/bs;
1634   bs2  = bs*bs;
1635 
1636   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1637     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1638   }
1639 
1640   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1641   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1642   if (nnz) {
1643     for (i=0; i<mbs; i++) {
1644       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1645       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1646     }
1647   }
1648 
1649   b       = (Mat_SeqBAIJ*)B->data;
1650   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1651   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1652   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1653   if (!flg) {
1654     switch (bs) {
1655     case 1:
1656       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1657       B->ops->mult            = MatMult_SeqBAIJ_1;
1658       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1659       break;
1660     case 2:
1661       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1662       B->ops->mult            = MatMult_SeqBAIJ_2;
1663       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1664       break;
1665     case 3:
1666       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1667       B->ops->mult            = MatMult_SeqBAIJ_3;
1668       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1669       break;
1670     case 4:
1671       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1672       B->ops->mult            = MatMult_SeqBAIJ_4;
1673       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1674       break;
1675     case 5:
1676       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1677       B->ops->mult            = MatMult_SeqBAIJ_5;
1678       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1679       break;
1680     case 6:
1681       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1682       B->ops->mult            = MatMult_SeqBAIJ_6;
1683       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1684       break;
1685     case 7:
1686       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1687       B->ops->mult            = MatMult_SeqBAIJ_7;
1688       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1689       break;
1690     default:
1691       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1692       B->ops->mult            = MatMult_SeqBAIJ_N;
1693       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1694       break;
1695     }
1696   }
1697   b->bs      = bs;
1698   b->mbs     = mbs;
1699   b->nbs     = nbs;
1700   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1701   if (!nnz) {
1702     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1703     else if (nz <= 0)        nz = 1;
1704     for (i=0; i<mbs; i++) b->imax[i] = nz;
1705     nz = nz*mbs;
1706   } else {
1707     nz = 0;
1708     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1709   }
1710 
1711   /* allocate the matrix space */
1712   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1713   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1714   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1715   b->j  = (int*)(b->a + nz*bs2);
1716   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1717   b->i  = b->j + nz;
1718   b->singlemalloc = PETSC_TRUE;
1719 
1720   b->i[0] = 0;
1721   for (i=1; i<mbs+1; i++) {
1722     b->i[i] = b->i[i-1] + b->imax[i-1];
1723   }
1724 
1725   /* b->ilen will count nonzeros in each block row so far. */
1726   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1727   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1728   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1729 
1730   b->bs               = bs;
1731   b->bs2              = bs2;
1732   b->mbs              = mbs;
1733   b->nz               = 0;
1734   b->maxnz            = nz*bs2;
1735   B->info.nz_unneeded = (PetscReal)b->maxnz;
1736   PetscFunctionReturn(0);
1737 }
1738 EXTERN_C_END
1739 
1740 /*MC
1741    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1742    block sparse compressed row format.
1743 
1744    Options Database Keys:
1745 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1746 
1747   Level: beginner
1748 
1749 .seealso: MatCreateSeqBAIJ
1750 M*/
1751 
1752 EXTERN_C_BEGIN
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatCreate_SeqBAIJ"
1755 int MatCreate_SeqBAIJ(Mat B)
1756 {
1757   int         ierr,size;
1758   Mat_SeqBAIJ *b;
1759 
1760   PetscFunctionBegin;
1761   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1762   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1763 
1764   B->m = B->M = PetscMax(B->m,B->M);
1765   B->n = B->N = PetscMax(B->n,B->N);
1766   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1767   B->data = (void*)b;
1768   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1769   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1770   B->factor           = 0;
1771   B->lupivotthreshold = 1.0;
1772   B->mapping          = 0;
1773   b->row              = 0;
1774   b->col              = 0;
1775   b->icol             = 0;
1776   b->reallocs         = 0;
1777   b->saved_values     = 0;
1778 #if defined(PETSC_USE_MAT_SINGLE)
1779   b->setvalueslen     = 0;
1780   b->setvaluescopy    = PETSC_NULL;
1781 #endif
1782 
1783   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1784   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1785 
1786   b->sorted           = PETSC_FALSE;
1787   b->roworiented      = PETSC_TRUE;
1788   b->nonew            = 0;
1789   b->diag             = 0;
1790   b->solve_work       = 0;
1791   b->mult_work        = 0;
1792   B->spptr            = 0;
1793   B->info.nz_unneeded = (PetscReal)b->maxnz;
1794   b->keepzeroedrows   = PETSC_FALSE;
1795   b->xtoy              = 0;
1796   b->XtoY              = 0;
1797 
1798   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1799                                      "MatStoreValues_SeqBAIJ",
1800                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1802                                      "MatRetrieveValues_SeqBAIJ",
1803                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1805                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1806                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1808                                      "MatConvert_SeqBAIJ_SeqAIJ",
1809                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1811                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1812                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 EXTERN_C_END
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1819 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1820 {
1821   Mat         C;
1822   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1823   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1824 
1825   PetscFunctionBegin;
1826   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1827 
1828   *B = 0;
1829   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1830   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1831   c    = (Mat_SeqBAIJ*)C->data;
1832 
1833   c->bs         = a->bs;
1834   c->bs2        = a->bs2;
1835   c->mbs        = a->mbs;
1836   c->nbs        = a->nbs;
1837   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1838 
1839   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1840   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1841   for (i=0; i<mbs; i++) {
1842     c->imax[i] = a->imax[i];
1843     c->ilen[i] = a->ilen[i];
1844   }
1845 
1846   /* allocate the matrix space */
1847   c->singlemalloc = PETSC_TRUE;
1848   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1849   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1850   c->j = (int*)(c->a + nz*bs2);
1851   c->i = c->j + nz;
1852   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1853   if (mbs > 0) {
1854     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1855     if (cpvalues == MAT_COPY_VALUES) {
1856       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1857     } else {
1858       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1859     }
1860   }
1861 
1862   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1863   c->sorted      = a->sorted;
1864   c->roworiented = a->roworiented;
1865   c->nonew       = a->nonew;
1866 
1867   if (a->diag) {
1868     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1869     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1870     for (i=0; i<mbs; i++) {
1871       c->diag[i] = a->diag[i];
1872     }
1873   } else c->diag        = 0;
1874   c->nz                 = a->nz;
1875   c->maxnz              = a->maxnz;
1876   c->solve_work         = 0;
1877   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1878   c->mult_work          = 0;
1879   C->preallocated       = PETSC_TRUE;
1880   C->assembled          = PETSC_TRUE;
1881   *B = C;
1882   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatLoad_SeqBAIJ"
1888 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1889 {
1890   Mat_SeqBAIJ  *a;
1891   Mat          B;
1892   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1893   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1894   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1895   int          *masked,nmask,tmp,bs2,ishift;
1896   PetscScalar  *aa;
1897   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1901   bs2  = bs*bs;
1902 
1903   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1904   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1905   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1906   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1907   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1908   M = header[1]; N = header[2]; nz = header[3];
1909 
1910   if (header[3] < 0) {
1911     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1912   }
1913 
1914   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1915 
1916   /*
1917      This code adds extra rows to make sure the number of rows is
1918     divisible by the blocksize
1919   */
1920   mbs        = M/bs;
1921   extra_rows = bs - M + bs*(mbs);
1922   if (extra_rows == bs) extra_rows = 0;
1923   else                  mbs++;
1924   if (extra_rows) {
1925     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1926   }
1927 
1928   /* read in row lengths */
1929   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1930   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1931   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1932 
1933   /* read in column indices */
1934   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1935   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1936   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1937 
1938   /* loop over row lengths determining block row lengths */
1939   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1940   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1941   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1942   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1943   masked   = mask + mbs;
1944   rowcount = 0; nzcount = 0;
1945   for (i=0; i<mbs; i++) {
1946     nmask = 0;
1947     for (j=0; j<bs; j++) {
1948       kmax = rowlengths[rowcount];
1949       for (k=0; k<kmax; k++) {
1950         tmp = jj[nzcount++]/bs;
1951         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1952       }
1953       rowcount++;
1954     }
1955     browlengths[i] += nmask;
1956     /* zero out the mask elements we set */
1957     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1958   }
1959 
1960   /* create our matrix */
1961   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1962   ierr = MatSetType(B,type);CHKERRQ(ierr);
1963   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1964   a = (Mat_SeqBAIJ*)B->data;
1965 
1966   /* set matrix "i" values */
1967   a->i[0] = 0;
1968   for (i=1; i<= mbs; i++) {
1969     a->i[i]      = a->i[i-1] + browlengths[i-1];
1970     a->ilen[i-1] = browlengths[i-1];
1971   }
1972   a->nz         = 0;
1973   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1974 
1975   /* read in nonzero values */
1976   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1977   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1978   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1979 
1980   /* set "a" and "j" values into matrix */
1981   nzcount = 0; jcount = 0;
1982   for (i=0; i<mbs; i++) {
1983     nzcountb = nzcount;
1984     nmask    = 0;
1985     for (j=0; j<bs; j++) {
1986       kmax = rowlengths[i*bs+j];
1987       for (k=0; k<kmax; k++) {
1988         tmp = jj[nzcount++]/bs;
1989 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1990       }
1991     }
1992     /* sort the masked values */
1993     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1994 
1995     /* set "j" values into matrix */
1996     maskcount = 1;
1997     for (j=0; j<nmask; j++) {
1998       a->j[jcount++]  = masked[j];
1999       mask[masked[j]] = maskcount++;
2000     }
2001     /* set "a" values into matrix */
2002     ishift = bs2*a->i[i];
2003     for (j=0; j<bs; j++) {
2004       kmax = rowlengths[i*bs+j];
2005       for (k=0; k<kmax; k++) {
2006         tmp       = jj[nzcountb]/bs ;
2007         block     = mask[tmp] - 1;
2008         point     = jj[nzcountb] - bs*tmp;
2009         idx       = ishift + bs2*block + j + bs*point;
2010         a->a[idx] = (MatScalar)aa[nzcountb++];
2011       }
2012     }
2013     /* zero out the mask elements we set */
2014     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2015   }
2016   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2017 
2018   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2019   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2020   ierr = PetscFree(aa);CHKERRQ(ierr);
2021   ierr = PetscFree(jj);CHKERRQ(ierr);
2022   ierr = PetscFree(mask);CHKERRQ(ierr);
2023 
2024   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2026   ierr = MatView_Private(B);CHKERRQ(ierr);
2027 
2028   *A = B;
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatCreateSeqBAIJ"
2034 /*@C
2035    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2036    compressed row) format.  For good matrix assembly performance the
2037    user should preallocate the matrix storage by setting the parameter nz
2038    (or the array nnz).  By setting these parameters accurately, performance
2039    during matrix assembly can be increased by more than a factor of 50.
2040 
2041    Collective on MPI_Comm
2042 
2043    Input Parameters:
2044 +  comm - MPI communicator, set to PETSC_COMM_SELF
2045 .  bs - size of block
2046 .  m - number of rows
2047 .  n - number of columns
2048 .  nz - number of nonzero blocks  per block row (same for all rows)
2049 -  nnz - array containing the number of nonzero blocks in the various block rows
2050          (possibly different for each block row) or PETSC_NULL
2051 
2052    Output Parameter:
2053 .  A - the matrix
2054 
2055    Options Database Keys:
2056 .   -mat_no_unroll - uses code that does not unroll the loops in the
2057                      block calculations (much slower)
2058 .    -mat_block_size - size of the blocks to use
2059 
2060    Level: intermediate
2061 
2062    Notes:
2063    A nonzero block is any block that as 1 or more nonzeros in it
2064 
2065    The block AIJ format is fully compatible with standard Fortran 77
2066    storage.  That is, the stored row and column indices can begin at
2067    either one (as in Fortran) or zero.  See the users' manual for details.
2068 
2069    Specify the preallocated storage with either nz or nnz (not both).
2070    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2071    allocation.  For additional details, see the users manual chapter on
2072    matrices.
2073 
2074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2075 @*/
2076 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2077 {
2078   int ierr;
2079 
2080   PetscFunctionBegin;
2081   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2082   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2083   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2089 /*@C
2090    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2091    per row in the matrix. For good matrix assembly performance the
2092    user should preallocate the matrix storage by setting the parameter nz
2093    (or the array nnz).  By setting these parameters accurately, performance
2094    during matrix assembly can be increased by more than a factor of 50.
2095 
2096    Collective on MPI_Comm
2097 
2098    Input Parameters:
2099 +  A - the matrix
2100 .  bs - size of block
2101 .  nz - number of block nonzeros per block row (same for all rows)
2102 -  nnz - array containing the number of block nonzeros in the various block rows
2103          (possibly different for each block row) or PETSC_NULL
2104 
2105    Options Database Keys:
2106 .   -mat_no_unroll - uses code that does not unroll the loops in the
2107                      block calculations (much slower)
2108 .    -mat_block_size - size of the blocks to use
2109 
2110    Level: intermediate
2111 
2112    Notes:
2113    The block AIJ format is fully compatible with standard Fortran 77
2114    storage.  That is, the stored row and column indices can begin at
2115    either one (as in Fortran) or zero.  See the users' manual for details.
2116 
2117    Specify the preallocated storage with either nz or nnz (not both).
2118    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2119    allocation.  For additional details, see the users manual chapter on
2120    matrices.
2121 
2122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2123 @*/
2124 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2125 {
2126   int ierr,(*f)(Mat,int,int,const int[]);
2127 
2128   PetscFunctionBegin;
2129   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2130   if (f) {
2131     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135