xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 618c6428babd1cdfc19b0f3e34ac03276908e50a)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25 {
26   Mat               A = *AA;
27   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
28   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int               *ai=a->i,*ailen=a->ilen;
30   int               *aj=a->j,stepval;
31   const PetscScalar *value = v;
32   MatScalar         *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 
163 #define CHUNKSIZE  10
164 
165 /*
166      Checks for missing diagonals
167 */
168 #undef __FUNCT__
169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
170 int MatMissingDiagonal_SeqBAIJ(Mat A)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173   int         *diag,*jj = a->j,i,ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
177   diag = a->diag;
178   for (i=0; i<a->mbs; i++) {
179     if (jj[diag[i]] != i) {
180       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181     }
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
188 int MatMarkDiagonal_SeqBAIJ(Mat A)
189 {
190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191   int         i,j,*diag,m = a->mbs,ierr;
192 
193   PetscFunctionBegin;
194   if (a->diag) PetscFunctionReturn(0);
195 
196   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
197   PetscLogObjectMemory(A,(m+1)*sizeof(int));
198   for (i=0; i<m; i++) {
199     diag[i] = a->i[i+1];
200     for (j=a->i[i]; j<a->i[i+1]; j++) {
201       if (a->j[j] == i) {
202         diag[i] = j;
203         break;
204       }
205     }
206   }
207   a->diag = diag;
208   PetscFunctionReturn(0);
209 }
210 
211 
212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217 {
218   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219   int         ierr,n = a->mbs,i;
220 
221   PetscFunctionBegin;
222   *nn = n;
223   if (!ia) PetscFunctionReturn(0);
224   if (symmetric) {
225     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
226   } else if (oshift == 1) {
227     /* temporarily add 1 to i and j indices */
228     int nz = a->i[n];
229     for (i=0; i<nz; i++) a->j[i]++;
230     for (i=0; i<n+1; i++) a->i[i]++;
231     *ia = a->i; *ja = a->j;
232   } else {
233     *ia = a->i; *ja = a->j;
234   }
235 
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242 {
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244   int         i,n = a->mbs,ierr;
245 
246   PetscFunctionBegin;
247   if (!ia) PetscFunctionReturn(0);
248   if (symmetric) {
249     ierr = PetscFree(*ia);CHKERRQ(ierr);
250     ierr = PetscFree(*ja);CHKERRQ(ierr);
251   } else if (oshift == 1) {
252     int nz = a->i[n]-1;
253     for (i=0; i<nz; i++) a->j[i]--;
254     for (i=0; i<n+1; i++) a->i[i]--;
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262 {
263   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
264 
265   PetscFunctionBegin;
266   *bs = baij->bs;
267   PetscFunctionReturn(0);
268 }
269 
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_SeqBAIJ"
273 int MatDestroy_SeqBAIJ(Mat A)
274 {
275   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276   int         ierr;
277 
278   PetscFunctionBegin;
279 #if defined(PETSC_USE_LOG)
280   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281 #endif
282   ierr = PetscFree(a->a);CHKERRQ(ierr);
283   if (!a->singlemalloc) {
284     ierr = PetscFree(a->i);CHKERRQ(ierr);
285     ierr = PetscFree(a->j);CHKERRQ(ierr);
286   }
287   if (a->row) {
288     ierr = ISDestroy(a->row);CHKERRQ(ierr);
289   }
290   if (a->col) {
291     ierr = ISDestroy(a->col);CHKERRQ(ierr);
292   }
293   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
294   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
295   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
296   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
297   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
298   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
299   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
300 #if defined(PETSC_USE_MAT_SINGLE)
301   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
302 #endif
303   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
304 
305   ierr = PetscFree(a);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSetOption_SeqBAIJ"
311 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312 {
313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
314 
315   PetscFunctionBegin;
316   switch (op) {
317   case MAT_ROW_ORIENTED:
318     a->roworiented    = PETSC_TRUE;
319     break;
320   case MAT_COLUMN_ORIENTED:
321     a->roworiented    = PETSC_FALSE;
322     break;
323   case MAT_COLUMNS_SORTED:
324     a->sorted         = PETSC_TRUE;
325     break;
326   case MAT_COLUMNS_UNSORTED:
327     a->sorted         = PETSC_FALSE;
328     break;
329   case MAT_KEEP_ZEROED_ROWS:
330     a->keepzeroedrows = PETSC_TRUE;
331     break;
332   case MAT_NO_NEW_NONZERO_LOCATIONS:
333     a->nonew          = 1;
334     break;
335   case MAT_NEW_NONZERO_LOCATION_ERR:
336     a->nonew          = -1;
337     break;
338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339     a->nonew          = -2;
340     break;
341   case MAT_YES_NEW_NONZERO_LOCATIONS:
342     a->nonew          = 0;
343     break;
344   case MAT_ROWS_SORTED:
345   case MAT_ROWS_UNSORTED:
346   case MAT_YES_NEW_DIAGONALS:
347   case MAT_IGNORE_OFF_PROC_ENTRIES:
348   case MAT_USE_HASH_TABLE:
349     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350     break;
351   case MAT_NO_NEW_DIAGONALS:
352     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353   default:
354     SETERRQ(PETSC_ERR_SUP,"unknown option");
355   }
356   PetscFunctionReturn(0);
357 }
358 
359 #undef __FUNCT__
360 #define __FUNCT__ "MatGetRow_SeqBAIJ"
361 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
362 {
363   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
364   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
365   MatScalar    *aa,*aa_i;
366   PetscScalar  *v_i;
367 
368   PetscFunctionBegin;
369   bs  = a->bs;
370   ai  = a->i;
371   aj  = a->j;
372   aa  = a->a;
373   bs2 = a->bs2;
374 
375   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
376 
377   bn  = row/bs;   /* Block number */
378   bp  = row % bs; /* Block Position */
379   M   = ai[bn+1] - ai[bn];
380   *nz = bs*M;
381 
382   if (v) {
383     *v = 0;
384     if (*nz) {
385       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
386       for (i=0; i<M; i++) { /* for each block in the block row */
387         v_i  = *v + i*bs;
388         aa_i = aa + bs2*(ai[bn] + i);
389         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
390       }
391     }
392   }
393 
394   if (idx) {
395     *idx = 0;
396     if (*nz) {
397       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
398       for (i=0; i<M; i++) { /* for each block in the block row */
399         idx_i = *idx + i*bs;
400         itmp  = bs*aj[ai[bn] + i];
401         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
410 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
411 {
412   int ierr;
413 
414   PetscFunctionBegin;
415   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
416   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatTranspose_SeqBAIJ"
422 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
423 {
424   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
425   Mat         C;
426   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
427   int         *rows,*cols,bs2=a->bs2;
428   PetscScalar *array;
429 
430   PetscFunctionBegin;
431   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
432   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
433   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
434 
435 #if defined(PETSC_USE_MAT_SINGLE)
436   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
437   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
438 #else
439   array = a->a;
440 #endif
441 
442   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
443   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
444   ierr = PetscFree(col);CHKERRQ(ierr);
445   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
446   cols = rows + bs;
447   for (i=0; i<mbs; i++) {
448     cols[0] = i*bs;
449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
450     len = ai[i+1] - ai[i];
451     for (j=0; j<len; j++) {
452       rows[0] = (*aj++)*bs;
453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
454       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
455       array += bs2;
456     }
457   }
458   ierr = PetscFree(rows);CHKERRQ(ierr);
459 #if defined(PETSC_USE_MAT_SINGLE)
460   ierr = PetscFree(array);
461 #endif
462 
463   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
464   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
465 
466   if (B) {
467     *B = C;
468   } else {
469     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
476 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
477 {
478   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
479   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
480   PetscScalar *aa;
481   FILE        *file;
482 
483   PetscFunctionBegin;
484   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
485   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
486   col_lens[0] = MAT_FILE_COOKIE;
487 
488   col_lens[1] = A->m;
489   col_lens[2] = A->n;
490   col_lens[3] = a->nz*bs2;
491 
492   /* store lengths of each row and write (including header) to file */
493   count = 0;
494   for (i=0; i<a->mbs; i++) {
495     for (j=0; j<bs; j++) {
496       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
497     }
498   }
499   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
500   ierr = PetscFree(col_lens);CHKERRQ(ierr);
501 
502   /* store column indices (zero start index) */
503   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
504   count = 0;
505   for (i=0; i<a->mbs; i++) {
506     for (j=0; j<bs; j++) {
507       for (k=a->i[i]; k<a->i[i+1]; k++) {
508         for (l=0; l<bs; l++) {
509           jj[count++] = bs*a->j[k] + l;
510         }
511       }
512     }
513   }
514   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
515   ierr = PetscFree(jj);CHKERRQ(ierr);
516 
517   /* store nonzero values */
518   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<a->mbs; i++) {
521     for (j=0; j<bs; j++) {
522       for (k=a->i[i]; k<a->i[i+1]; k++) {
523         for (l=0; l<bs; l++) {
524           aa[count++] = a->a[bs2*k + l*bs + j];
525         }
526       }
527     }
528   }
529   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
530   ierr = PetscFree(aa);CHKERRQ(ierr);
531 
532   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
533   if (file) {
534     fprintf(file,"-matload_block_size %d\n",a->bs);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
541 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
545   PetscViewerFormat format;
546 
547   PetscFunctionBegin;
548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
552     Mat aij;
553     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
555     ierr = MatDestroy(aij);CHKERRQ(ierr);
556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
557      PetscFunctionReturn(0);
558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
560     for (i=0; i<a->mbs; i++) {
561       for (j=0; j<bs; j++) {
562         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
563         for (k=a->i[i]; k<a->i[i+1]; k++) {
564           for (l=0; l<bs; l++) {
565 #if defined(PETSC_USE_COMPLEX)
566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
567               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
570               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
574             }
575 #else
576             if (a->a[bs2*k + l*bs + j] != 0.0) {
577               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
578             }
579 #endif
580           }
581         }
582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
583       }
584     }
585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
588     for (i=0; i<a->mbs; i++) {
589       for (j=0; j<bs; j++) {
590         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
591         for (k=a->i[i]; k<a->i[i+1]; k++) {
592           for (l=0; l<bs; l++) {
593 #if defined(PETSC_USE_COMPLEX)
594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
595               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
598               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
600             } else {
601               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
602             }
603 #else
604             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
605 #endif
606           }
607         }
608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
609       }
610     }
611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
612   }
613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
619 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
620 {
621   Mat          A = (Mat) Aa;
622   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
623   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
624   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
625   MatScalar    *aa;
626   PetscViewer  viewer;
627 
628   PetscFunctionBegin;
629 
630   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
631   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
632 
633   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
634 
635   /* loop over matrix elements drawing boxes */
636   color = PETSC_DRAW_BLUE;
637   for (i=0,row=0; i<mbs; i++,row+=bs) {
638     for (j=a->i[i]; j<a->i[i+1]; j++) {
639       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
640       x_l = a->j[j]*bs; x_r = x_l + 1.0;
641       aa = a->a + j*bs2;
642       for (k=0; k<bs; k++) {
643         for (l=0; l<bs; l++) {
644           if (PetscRealPart(*aa++) >=  0.) continue;
645           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
646         }
647       }
648     }
649   }
650   color = PETSC_DRAW_CYAN;
651   for (i=0,row=0; i<mbs; i++,row+=bs) {
652     for (j=a->i[i]; j<a->i[i+1]; j++) {
653       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
654       x_l = a->j[j]*bs; x_r = x_l + 1.0;
655       aa = a->a + j*bs2;
656       for (k=0; k<bs; k++) {
657         for (l=0; l<bs; l++) {
658           if (PetscRealPart(*aa++) != 0.) continue;
659           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664 
665   color = PETSC_DRAW_RED;
666   for (i=0,row=0; i<mbs; i++,row+=bs) {
667     for (j=a->i[i]; j<a->i[i+1]; j++) {
668       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
669       x_l = a->j[j]*bs; x_r = x_l + 1.0;
670       aa = a->a + j*bs2;
671       for (k=0; k<bs; k++) {
672         for (l=0; l<bs; l++) {
673           if (PetscRealPart(*aa++) <= 0.) continue;
674           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
675         }
676       }
677     }
678   }
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
684 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
685 {
686   int          ierr;
687   PetscReal    xl,yl,xr,yr,w,h;
688   PetscDraw    draw;
689   PetscTruth   isnull;
690 
691   PetscFunctionBegin;
692 
693   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
695 
696   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
697   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
698   xr += w;    yr += h;  xl = -w;     yl = -h;
699   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
700   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
701   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
702   PetscFunctionReturn(0);
703 }
704 
705 #undef __FUNCT__
706 #define __FUNCT__ "MatView_SeqBAIJ"
707 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
708 {
709   int        ierr;
710   PetscTruth issocket,isascii,isbinary,isdraw;
711 
712   PetscFunctionBegin;
713   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
717   if (issocket) {
718     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
719   } else if (isascii){
720     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
721   } else if (isbinary) {
722     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
723   } else if (isdraw) {
724     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
725   } else {
726     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
727   }
728   PetscFunctionReturn(0);
729 }
730 
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatGetValues_SeqBAIJ"
734 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
735 {
736   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
737   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
738   int        *ai = a->i,*ailen = a->ilen;
739   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
740   MatScalar  *ap,*aa = a->a,zero = 0.0;
741 
742   PetscFunctionBegin;
743   for (k=0; k<m; k++) { /* loop over rows */
744     row  = im[k]; brow = row/bs;
745     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
746     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
747     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
748     nrow = ailen[brow];
749     for (l=0; l<n; l++) { /* loop over columns */
750       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
751       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
752       col  = in[l] ;
753       bcol = col/bs;
754       cidx = col%bs;
755       ridx = row%bs;
756       high = nrow;
757       low  = 0; /* assume unsorted */
758       while (high-low > 5) {
759         t = (low+high)/2;
760         if (rp[t] > bcol) high = t;
761         else             low  = t;
762       }
763       for (i=low; i<high; i++) {
764         if (rp[i] > bcol) break;
765         if (rp[i] == bcol) {
766           *v++ = ap[bs2*i+bs*cidx+ridx];
767           goto finished;
768         }
769       }
770       *v++ = zero;
771       finished:;
772     }
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 #if defined(PETSC_USE_MAT_SINGLE)
778 #undef __FUNCT__
779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
780 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
781 {
782   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
783   int         ierr,i,N = m*n*b->bs2;
784   MatScalar   *vsingle;
785 
786   PetscFunctionBegin;
787   if (N > b->setvalueslen) {
788     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
789     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
790     b->setvalueslen  = N;
791   }
792   vsingle = b->setvaluescopy;
793   for (i=0; i<N; i++) {
794     vsingle[i] = v[i];
795   }
796   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
797   PetscFunctionReturn(0);
798 }
799 #endif
800 
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
804 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
805 {
806   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
807   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
808   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
809   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
810   PetscTruth        roworiented=a->roworiented;
811   const MatScalar   *value = v;
812   MatScalar         *ap,*aa = a->a,*bap;
813 
814   PetscFunctionBegin;
815   if (roworiented) {
816     stepval = (n-1)*bs;
817   } else {
818     stepval = (m-1)*bs;
819   }
820   for (k=0; k<m; k++) { /* loop over added rows */
821     row  = im[k];
822     if (row < 0) continue;
823 #if defined(PETSC_USE_BOPT_g)
824     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
825 #endif
826     rp   = aj + ai[row];
827     ap   = aa + bs2*ai[row];
828     rmax = imax[row];
829     nrow = ailen[row];
830     low  = 0;
831     for (l=0; l<n; l++) { /* loop over added columns */
832       if (in[l] < 0) continue;
833 #if defined(PETSC_USE_BOPT_g)
834       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
835 #endif
836       col = in[l];
837       if (roworiented) {
838         value = v + k*(stepval+bs)*bs + l*bs;
839       } else {
840         value = v + l*(stepval+bs)*bs + k*bs;
841       }
842       if (!sorted) low = 0; high = nrow;
843       while (high-low > 7) {
844         t = (low+high)/2;
845         if (rp[t] > col) high = t;
846         else             low  = t;
847       }
848       for (i=low; i<high; i++) {
849         if (rp[i] > col) break;
850         if (rp[i] == col) {
851           bap  = ap +  bs2*i;
852           if (roworiented) {
853             if (is == ADD_VALUES) {
854               for (ii=0; ii<bs; ii++,value+=stepval) {
855                 for (jj=ii; jj<bs2; jj+=bs) {
856                   bap[jj] += *value++;
857                 }
858               }
859             } else {
860               for (ii=0; ii<bs; ii++,value+=stepval) {
861                 for (jj=ii; jj<bs2; jj+=bs) {
862                   bap[jj] = *value++;
863                 }
864               }
865             }
866           } else {
867             if (is == ADD_VALUES) {
868               for (ii=0; ii<bs; ii++,value+=stepval) {
869                 for (jj=0; jj<bs; jj++) {
870                   *bap++ += *value++;
871                 }
872               }
873             } else {
874               for (ii=0; ii<bs; ii++,value+=stepval) {
875                 for (jj=0; jj<bs; jj++) {
876                   *bap++  = *value++;
877                 }
878               }
879             }
880           }
881           goto noinsert2;
882         }
883       }
884       if (nonew == 1) goto noinsert2;
885       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
886       if (nrow >= rmax) {
887         /* there is no extra room in row, therefore enlarge */
888         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
889         MatScalar *new_a;
890 
891         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
892 
893         /* malloc new storage space */
894         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
895 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
896         new_j   = (int*)(new_a + bs2*new_nz);
897         new_i   = new_j + new_nz;
898 
899         /* copy over old data into new slots */
900         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
901         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
902         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
903         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
904         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
905         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
906         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
907         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
908         /* free up old matrix storage */
909         ierr = PetscFree(a->a);CHKERRQ(ierr);
910         if (!a->singlemalloc) {
911           ierr = PetscFree(a->i);CHKERRQ(ierr);
912           ierr = PetscFree(a->j);CHKERRQ(ierr);
913         }
914         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
915         a->singlemalloc = PETSC_TRUE;
916 
917         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
918         rmax = imax[row] = imax[row] + CHUNKSIZE;
919         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
920         a->maxnz += bs2*CHUNKSIZE;
921         a->reallocs++;
922         a->nz++;
923       }
924       N = nrow++ - 1;
925       /* shift up all the later entries in this row */
926       for (ii=N; ii>=i; ii--) {
927         rp[ii+1] = rp[ii];
928         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
929       }
930       if (N >= i) {
931         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
932       }
933       rp[i] = col;
934       bap   = ap +  bs2*i;
935       if (roworiented) {
936         for (ii=0; ii<bs; ii++,value+=stepval) {
937           for (jj=ii; jj<bs2; jj+=bs) {
938             bap[jj] = *value++;
939           }
940         }
941       } else {
942         for (ii=0; ii<bs; ii++,value+=stepval) {
943           for (jj=0; jj<bs; jj++) {
944             *bap++  = *value++;
945           }
946         }
947       }
948       noinsert2:;
949       low = i;
950     }
951     ailen[row] = nrow;
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
958 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
959 {
960   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
961   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
962   int        m = A->m,*ip,N,*ailen = a->ilen;
963   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
964   MatScalar  *aa = a->a,*ap;
965 
966   PetscFunctionBegin;
967   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
968 
969   if (m) rmax = ailen[0];
970   for (i=1; i<mbs; i++) {
971     /* move each row back by the amount of empty slots (fshift) before it*/
972     fshift += imax[i-1] - ailen[i-1];
973     rmax   = PetscMax(rmax,ailen[i]);
974     if (fshift) {
975       ip = aj + ai[i]; ap = aa + bs2*ai[i];
976       N = ailen[i];
977       for (j=0; j<N; j++) {
978         ip[j-fshift] = ip[j];
979         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
980       }
981     }
982     ai[i] = ai[i-1] + ailen[i-1];
983   }
984   if (mbs) {
985     fshift += imax[mbs-1] - ailen[mbs-1];
986     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
987   }
988   /* reset ilen and imax for each row */
989   for (i=0; i<mbs; i++) {
990     ailen[i] = imax[i] = ai[i+1] - ai[i];
991   }
992   a->nz = ai[mbs];
993 
994   /* diagonals may have moved, so kill the diagonal pointers */
995   if (fshift && a->diag) {
996     ierr = PetscFree(a->diag);CHKERRQ(ierr);
997     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
998     a->diag = 0;
999   }
1000   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1001   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1002   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1003   a->reallocs          = 0;
1004   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1005 
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 
1010 
1011 /*
1012    This function returns an array of flags which indicate the locations of contiguous
1013    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1014    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1015    Assume: sizes should be long enough to hold all the values.
1016 */
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1019 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1020 {
1021   int        i,j,k,row;
1022   PetscTruth flg;
1023 
1024   PetscFunctionBegin;
1025   for (i=0,j=0; i<n; j++) {
1026     row = idx[i];
1027     if (row%bs!=0) { /* Not the begining of a block */
1028       sizes[j] = 1;
1029       i++;
1030     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1031       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1032       i++;
1033     } else { /* Begining of the block, so check if the complete block exists */
1034       flg = PETSC_TRUE;
1035       for (k=1; k<bs; k++) {
1036         if (row+k != idx[i+k]) { /* break in the block */
1037           flg = PETSC_FALSE;
1038           break;
1039         }
1040       }
1041       if (flg == PETSC_TRUE) { /* No break in the bs */
1042         sizes[j] = bs;
1043         i+= bs;
1044       } else {
1045         sizes[j] = 1;
1046         i++;
1047       }
1048     }
1049   }
1050   *bs_max = j;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1056 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1057 {
1058   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1059   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1060   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1061   PetscScalar zero = 0.0;
1062   MatScalar   *aa;
1063 
1064   PetscFunctionBegin;
1065   /* Make a copy of the IS and  sort it */
1066   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1067   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1068 
1069   /* allocate memory for rows,sizes */
1070   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1071   sizes = rows + is_n;
1072 
1073   /* copy IS values to rows, and sort them */
1074   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1075   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1076   if (baij->keepzeroedrows) {
1077     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1078     bs_max = is_n;
1079   } else {
1080     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1081   }
1082   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1083 
1084   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1085     row   = rows[j];
1086     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1087     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1088     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1089     if (sizes[i] == bs && !baij->keepzeroedrows) {
1090       if (diag) {
1091         if (baij->ilen[row/bs] > 0) {
1092           baij->ilen[row/bs]       = 1;
1093           baij->j[baij->i[row/bs]] = row/bs;
1094           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1095         }
1096         /* Now insert all the diagonal values for this bs */
1097         for (k=0; k<bs; k++) {
1098           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1099         }
1100       } else { /* (!diag) */
1101         baij->ilen[row/bs] = 0;
1102       } /* end (!diag) */
1103     } else { /* (sizes[i] != bs) */
1104 #if defined (PETSC_USE_DEBUG)
1105       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1106 #endif
1107       for (k=0; k<count; k++) {
1108         aa[0] =  zero;
1109         aa    += bs;
1110       }
1111       if (diag) {
1112         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1113       }
1114     }
1115   }
1116 
1117   ierr = PetscFree(rows);CHKERRQ(ierr);
1118   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1124 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1125 {
1126   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1127   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1128   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1129   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1130   int         ridx,cidx,bs2=a->bs2,ierr;
1131   PetscTruth  roworiented=a->roworiented;
1132   MatScalar   *ap,value,*aa=a->a,*bap;
1133 
1134   PetscFunctionBegin;
1135   for (k=0; k<m; k++) { /* loop over added rows */
1136     row  = im[k]; brow = row/bs;
1137     if (row < 0) continue;
1138 #if defined(PETSC_USE_BOPT_g)
1139     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1140 #endif
1141     rp   = aj + ai[brow];
1142     ap   = aa + bs2*ai[brow];
1143     rmax = imax[brow];
1144     nrow = ailen[brow];
1145     low  = 0;
1146     for (l=0; l<n; l++) { /* loop over added columns */
1147       if (in[l] < 0) continue;
1148 #if defined(PETSC_USE_BOPT_g)
1149       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1150 #endif
1151       col = in[l]; bcol = col/bs;
1152       ridx = row % bs; cidx = col % bs;
1153       if (roworiented) {
1154         value = v[l + k*n];
1155       } else {
1156         value = v[k + l*m];
1157       }
1158       if (!sorted) low = 0; high = nrow;
1159       while (high-low > 7) {
1160         t = (low+high)/2;
1161         if (rp[t] > bcol) high = t;
1162         else              low  = t;
1163       }
1164       for (i=low; i<high; i++) {
1165         if (rp[i] > bcol) break;
1166         if (rp[i] == bcol) {
1167           bap  = ap +  bs2*i + bs*cidx + ridx;
1168           if (is == ADD_VALUES) *bap += value;
1169           else                  *bap  = value;
1170           goto noinsert1;
1171         }
1172       }
1173       if (nonew == 1) goto noinsert1;
1174       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1175       if (nrow >= rmax) {
1176         /* there is no extra room in row, therefore enlarge */
1177         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1178         MatScalar *new_a;
1179 
1180         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1181 
1182         /* Malloc new storage space */
1183         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1184 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1185         new_j   = (int*)(new_a + bs2*new_nz);
1186         new_i   = new_j + new_nz;
1187 
1188         /* copy over old data into new slots */
1189         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1190         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1191         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1192         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1193         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1194         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1195         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1196         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1197         /* free up old matrix storage */
1198         ierr = PetscFree(a->a);CHKERRQ(ierr);
1199         if (!a->singlemalloc) {
1200           ierr = PetscFree(a->i);CHKERRQ(ierr);
1201           ierr = PetscFree(a->j);CHKERRQ(ierr);
1202         }
1203         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1204         a->singlemalloc = PETSC_TRUE;
1205 
1206         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1207         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1208         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1209         a->maxnz += bs2*CHUNKSIZE;
1210         a->reallocs++;
1211         a->nz++;
1212       }
1213       N = nrow++ - 1;
1214       /* shift up all the later entries in this row */
1215       for (ii=N; ii>=i; ii--) {
1216         rp[ii+1] = rp[ii];
1217         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1218       }
1219       if (N>=i) {
1220         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1221       }
1222       rp[i]                      = bcol;
1223       ap[bs2*i + bs*cidx + ridx] = value;
1224       noinsert1:;
1225       low = i;
1226     }
1227     ailen[brow] = nrow;
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1235 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1236 {
1237   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1238   Mat         outA;
1239   int         ierr;
1240   PetscTruth  row_identity,col_identity;
1241 
1242   PetscFunctionBegin;
1243   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1244   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1245   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1246   if (!row_identity || !col_identity) {
1247     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1248   }
1249 
1250   outA          = inA;
1251   inA->factor   = FACTOR_LU;
1252 
1253   if (!a->diag) {
1254     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1255   }
1256 
1257   a->row        = row;
1258   a->col        = col;
1259   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1260   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1261 
1262   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1263   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1264   PetscLogObjectParent(inA,a->icol);
1265 
1266   /*
1267       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1268       for ILU(0) factorization with natural ordering
1269   */
1270   if (a->bs < 8) {
1271     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1272   } else {
1273     if (!a->solve_work) {
1274       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1275       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1276     }
1277   }
1278 
1279   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1280 
1281   PetscFunctionReturn(0);
1282 }
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1285 int MatPrintHelp_SeqBAIJ(Mat A)
1286 {
1287   static PetscTruth called = PETSC_FALSE;
1288   MPI_Comm          comm = A->comm;
1289   int               ierr;
1290 
1291   PetscFunctionBegin;
1292   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1293   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1294   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 EXTERN_C_BEGIN
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1301 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1302 {
1303   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1304   int         i,nz,nbs;
1305 
1306   PetscFunctionBegin;
1307   nz  = baij->maxnz/baij->bs2;
1308   nbs = baij->nbs;
1309   for (i=0; i<nz; i++) {
1310     baij->j[i] = indices[i];
1311   }
1312   baij->nz = nz;
1313   for (i=0; i<nbs; i++) {
1314     baij->ilen[i] = baij->imax[i];
1315   }
1316 
1317   PetscFunctionReturn(0);
1318 }
1319 EXTERN_C_END
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1323 /*@
1324     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1325        in the matrix.
1326 
1327   Input Parameters:
1328 +  mat - the SeqBAIJ matrix
1329 -  indices - the column indices
1330 
1331   Level: advanced
1332 
1333   Notes:
1334     This can be called if you have precomputed the nonzero structure of the
1335   matrix and want to provide it to the matrix object to improve the performance
1336   of the MatSetValues() operation.
1337 
1338     You MUST have set the correct numbers of nonzeros per row in the call to
1339   MatCreateSeqBAIJ().
1340 
1341     MUST be called before any calls to MatSetValues();
1342 
1343 @*/
1344 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1345 {
1346   int ierr,(*f)(Mat,int *);
1347 
1348   PetscFunctionBegin;
1349   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1350   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1351   if (f) {
1352     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1353   } else {
1354     SETERRQ(1,"Wrong type of matrix to set column indices");
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1361 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1362 {
1363   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1364   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1365   PetscReal    atmp;
1366   PetscScalar  *x,zero = 0.0;
1367   MatScalar    *aa;
1368   int          ncols,brow,krow,kcol;
1369 
1370   PetscFunctionBegin;
1371   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1372   bs   = a->bs;
1373   aa   = a->a;
1374   ai   = a->i;
1375   aj   = a->j;
1376   mbs = a->mbs;
1377 
1378   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1379   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1380   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1381   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1382   for (i=0; i<mbs; i++) {
1383     ncols = ai[1] - ai[0]; ai++;
1384     brow  = bs*i;
1385     for (j=0; j<ncols; j++){
1386       /* bcol = bs*(*aj); */
1387       for (kcol=0; kcol<bs; kcol++){
1388         for (krow=0; krow<bs; krow++){
1389           atmp = PetscAbsScalar(*aa); aa++;
1390           row = brow + krow;    /* row index */
1391           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1392           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1393         }
1394       }
1395       aj++;
1396     }
1397   }
1398   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1404 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1405 {
1406   int        ierr;
1407 
1408   PetscFunctionBegin;
1409   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1415 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1416 {
1417   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1418   PetscFunctionBegin;
1419   *array = a->a;
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1425 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1426 {
1427   PetscFunctionBegin;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #include "petscblaslapack.h"
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1434 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1435 {
1436   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1437   int          ierr,one=1,i,bs=y->bs,j,bs2;
1438 
1439   PetscFunctionBegin;
1440   if (str == SAME_NONZERO_PATTERN) {
1441     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1442   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1443     if (y->xtoy && y->XtoY != X) {
1444       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1445       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1446     }
1447     if (!y->xtoy) { /* get xtoy */
1448       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1449       y->XtoY = X;
1450     }
1451     bs2 = bs*bs;
1452     for (i=0; i<x->nz; i++) {
1453       j = 0;
1454       while (j < bs2){
1455         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1456         j++;
1457       }
1458     }
1459     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1460   } else {
1461     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 /* -------------------------------------------------------------------*/
1467 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1468        MatGetRow_SeqBAIJ,
1469        MatRestoreRow_SeqBAIJ,
1470        MatMult_SeqBAIJ_N,
1471        MatMultAdd_SeqBAIJ_N,
1472        MatMultTranspose_SeqBAIJ,
1473        MatMultTransposeAdd_SeqBAIJ,
1474        MatSolve_SeqBAIJ_N,
1475        0,
1476        0,
1477        0,
1478        MatLUFactor_SeqBAIJ,
1479        0,
1480        0,
1481        MatTranspose_SeqBAIJ,
1482        MatGetInfo_SeqBAIJ,
1483        MatEqual_SeqBAIJ,
1484        MatGetDiagonal_SeqBAIJ,
1485        MatDiagonalScale_SeqBAIJ,
1486        MatNorm_SeqBAIJ,
1487        0,
1488        MatAssemblyEnd_SeqBAIJ,
1489        0,
1490        MatSetOption_SeqBAIJ,
1491        MatZeroEntries_SeqBAIJ,
1492        MatZeroRows_SeqBAIJ,
1493        MatLUFactorSymbolic_SeqBAIJ,
1494        MatLUFactorNumeric_SeqBAIJ_N,
1495        0,
1496        0,
1497        MatSetUpPreallocation_SeqBAIJ,
1498        MatILUFactorSymbolic_SeqBAIJ,
1499        0,
1500        MatGetArray_SeqBAIJ,
1501        MatRestoreArray_SeqBAIJ,
1502        MatDuplicate_SeqBAIJ,
1503        0,
1504        0,
1505        MatILUFactor_SeqBAIJ,
1506        0,
1507        MatAXPY_SeqBAIJ,
1508        MatGetSubMatrices_SeqBAIJ,
1509        MatIncreaseOverlap_SeqBAIJ,
1510        MatGetValues_SeqBAIJ,
1511        0,
1512        MatPrintHelp_SeqBAIJ,
1513        MatScale_SeqBAIJ,
1514        0,
1515        0,
1516        0,
1517        MatGetBlockSize_SeqBAIJ,
1518        MatGetRowIJ_SeqBAIJ,
1519        MatRestoreRowIJ_SeqBAIJ,
1520        0,
1521        0,
1522        0,
1523        0,
1524        0,
1525        0,
1526        MatSetValuesBlocked_SeqBAIJ,
1527        MatGetSubMatrix_SeqBAIJ,
1528        MatDestroy_SeqBAIJ,
1529        MatView_SeqBAIJ,
1530        MatGetPetscMaps_Petsc,
1531        0,
1532        0,
1533        0,
1534        0,
1535        0,
1536        0,
1537        MatGetRowMax_SeqBAIJ,
1538        MatConvert_Basic};
1539 
1540 EXTERN_C_BEGIN
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1543 int MatStoreValues_SeqBAIJ(Mat mat)
1544 {
1545   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1546   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1547   int         ierr;
1548 
1549   PetscFunctionBegin;
1550   if (aij->nonew != 1) {
1551     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1552   }
1553 
1554   /* allocate space for values if not already there */
1555   if (!aij->saved_values) {
1556     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1557   }
1558 
1559   /* copy values over */
1560   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 EXTERN_C_END
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1568 int MatRetrieveValues_SeqBAIJ(Mat mat)
1569 {
1570   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1571   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1572 
1573   PetscFunctionBegin;
1574   if (aij->nonew != 1) {
1575     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1576   }
1577   if (!aij->saved_values) {
1578     SETERRQ(1,"Must call MatStoreValues(A);first");
1579   }
1580 
1581   /* copy values over */
1582   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 EXTERN_C_END
1586 
1587 EXTERN_C_BEGIN
1588 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1589 EXTERN_C_END
1590 
1591 EXTERN_C_BEGIN
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1594 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1595 {
1596   Mat_SeqBAIJ *b;
1597   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1598   PetscTruth  flg;
1599 
1600   PetscFunctionBegin;
1601 
1602   B->preallocated = PETSC_TRUE;
1603   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1604   if (nnz && newbs != bs) {
1605     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1606   }
1607   bs   = newbs;
1608 
1609   mbs  = B->m/bs;
1610   nbs  = B->n/bs;
1611   bs2  = bs*bs;
1612 
1613   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1614     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1615   }
1616 
1617   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1618   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1619   if (nnz) {
1620     for (i=0; i<mbs; i++) {
1621       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1622       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1623     }
1624   }
1625 
1626   b       = (Mat_SeqBAIJ*)B->data;
1627   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1628   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1629   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1630   if (!flg) {
1631     switch (bs) {
1632     case 1:
1633       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1634       B->ops->mult            = MatMult_SeqBAIJ_1;
1635       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1636       break;
1637     case 2:
1638       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1639       B->ops->mult            = MatMult_SeqBAIJ_2;
1640       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1641       break;
1642     case 3:
1643       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1644       B->ops->mult            = MatMult_SeqBAIJ_3;
1645       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1646       break;
1647     case 4:
1648       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1649       B->ops->mult            = MatMult_SeqBAIJ_4;
1650       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1651       break;
1652     case 5:
1653       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1654       B->ops->mult            = MatMult_SeqBAIJ_5;
1655       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1656       break;
1657     case 6:
1658       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1659       B->ops->mult            = MatMult_SeqBAIJ_6;
1660       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1661       break;
1662     case 7:
1663       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1664       B->ops->mult            = MatMult_SeqBAIJ_7;
1665       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1666       break;
1667     default:
1668       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1669       B->ops->mult            = MatMult_SeqBAIJ_N;
1670       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1671       break;
1672     }
1673   }
1674   b->bs      = bs;
1675   b->mbs     = mbs;
1676   b->nbs     = nbs;
1677   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1678   if (!nnz) {
1679     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1680     else if (nz <= 0)        nz = 1;
1681     for (i=0; i<mbs; i++) b->imax[i] = nz;
1682     nz = nz*mbs;
1683   } else {
1684     nz = 0;
1685     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1686   }
1687 
1688   /* allocate the matrix space */
1689   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1690   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1691   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1692   b->j  = (int*)(b->a + nz*bs2);
1693   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1694   b->i  = b->j + nz;
1695   b->singlemalloc = PETSC_TRUE;
1696 
1697   b->i[0] = 0;
1698   for (i=1; i<mbs+1; i++) {
1699     b->i[i] = b->i[i-1] + b->imax[i-1];
1700   }
1701 
1702   /* b->ilen will count nonzeros in each block row so far. */
1703   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1704   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1705   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1706 
1707   b->bs               = bs;
1708   b->bs2              = bs2;
1709   b->mbs              = mbs;
1710   b->nz               = 0;
1711   b->maxnz            = nz*bs2;
1712   B->info.nz_unneeded = (PetscReal)b->maxnz;
1713   PetscFunctionReturn(0);
1714 }
1715 EXTERN_C_END
1716 
1717 EXTERN_C_BEGIN
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatCreate_SeqBAIJ"
1720 int MatCreate_SeqBAIJ(Mat B)
1721 {
1722   int         ierr,size;
1723   Mat_SeqBAIJ *b;
1724 
1725   PetscFunctionBegin;
1726   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1727   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1728 
1729   B->m = B->M = PetscMax(B->m,B->M);
1730   B->n = B->N = PetscMax(B->n,B->N);
1731   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1732   B->data = (void*)b;
1733   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1734   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1735   B->factor           = 0;
1736   B->lupivotthreshold = 1.0;
1737   B->mapping          = 0;
1738   b->row              = 0;
1739   b->col              = 0;
1740   b->icol             = 0;
1741   b->reallocs         = 0;
1742   b->saved_values     = 0;
1743 #if defined(PETSC_USE_MAT_SINGLE)
1744   b->setvalueslen     = 0;
1745   b->setvaluescopy    = PETSC_NULL;
1746 #endif
1747 
1748   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1749   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1750 
1751   b->sorted           = PETSC_FALSE;
1752   b->roworiented      = PETSC_TRUE;
1753   b->nonew            = 0;
1754   b->diag             = 0;
1755   b->solve_work       = 0;
1756   b->mult_work        = 0;
1757   B->spptr            = 0;
1758   B->info.nz_unneeded = (PetscReal)b->maxnz;
1759   b->keepzeroedrows   = PETSC_FALSE;
1760   b->xtoy              = 0;
1761   b->XtoY              = 0;
1762 
1763   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1764                                      "MatStoreValues_SeqBAIJ",
1765                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1767                                      "MatRetrieveValues_SeqBAIJ",
1768                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1770                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1771                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1773                                      "MatConvert_SeqBAIJ_SeqAIJ",
1774                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1776                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1777                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 EXTERN_C_END
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1784 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1785 {
1786   Mat         C;
1787   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1788   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1789 
1790   PetscFunctionBegin;
1791   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1792 
1793   *B = 0;
1794   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1795   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1796   c    = (Mat_SeqBAIJ*)C->data;
1797 
1798   c->bs         = a->bs;
1799   c->bs2        = a->bs2;
1800   c->mbs        = a->mbs;
1801   c->nbs        = a->nbs;
1802   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1803 
1804   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1805   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1806   for (i=0; i<mbs; i++) {
1807     c->imax[i] = a->imax[i];
1808     c->ilen[i] = a->ilen[i];
1809   }
1810 
1811   /* allocate the matrix space */
1812   c->singlemalloc = PETSC_TRUE;
1813   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1814   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1815   c->j = (int*)(c->a + nz*bs2);
1816   c->i = c->j + nz;
1817   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1818   if (mbs > 0) {
1819     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1820     if (cpvalues == MAT_COPY_VALUES) {
1821       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1822     } else {
1823       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1824     }
1825   }
1826 
1827   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1828   c->sorted      = a->sorted;
1829   c->roworiented = a->roworiented;
1830   c->nonew       = a->nonew;
1831 
1832   if (a->diag) {
1833     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1834     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1835     for (i=0; i<mbs; i++) {
1836       c->diag[i] = a->diag[i];
1837     }
1838   } else c->diag        = 0;
1839   c->nz                 = a->nz;
1840   c->maxnz              = a->maxnz;
1841   c->solve_work         = 0;
1842   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1843   c->mult_work          = 0;
1844   C->preallocated       = PETSC_TRUE;
1845   C->assembled          = PETSC_TRUE;
1846   *B = C;
1847   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 EXTERN_C_BEGIN
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatLoad_SeqBAIJ"
1854 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1855 {
1856   Mat_SeqBAIJ  *a;
1857   Mat          B;
1858   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1859   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1860   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1861   int          *masked,nmask,tmp,bs2,ishift;
1862   PetscScalar  *aa;
1863   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1864 
1865   PetscFunctionBegin;
1866   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1867   bs2  = bs*bs;
1868 
1869   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1870   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1871   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1872   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1873   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1874   M = header[1]; N = header[2]; nz = header[3];
1875 
1876   if (header[3] < 0) {
1877     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1878   }
1879 
1880   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1881 
1882   /*
1883      This code adds extra rows to make sure the number of rows is
1884     divisible by the blocksize
1885   */
1886   mbs        = M/bs;
1887   extra_rows = bs - M + bs*(mbs);
1888   if (extra_rows == bs) extra_rows = 0;
1889   else                  mbs++;
1890   if (extra_rows) {
1891     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1892   }
1893 
1894   /* read in row lengths */
1895   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1896   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1897   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1898 
1899   /* read in column indices */
1900   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1901   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1902   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1903 
1904   /* loop over row lengths determining block row lengths */
1905   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1906   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1907   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1908   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1909   masked   = mask + mbs;
1910   rowcount = 0; nzcount = 0;
1911   for (i=0; i<mbs; i++) {
1912     nmask = 0;
1913     for (j=0; j<bs; j++) {
1914       kmax = rowlengths[rowcount];
1915       for (k=0; k<kmax; k++) {
1916         tmp = jj[nzcount++]/bs;
1917         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1918       }
1919       rowcount++;
1920     }
1921     browlengths[i] += nmask;
1922     /* zero out the mask elements we set */
1923     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1924   }
1925 
1926   /* create our matrix */
1927   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1928   ierr = MatSetType(B,type);CHKERRQ(ierr);
1929   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1930   a = (Mat_SeqBAIJ*)B->data;
1931 
1932   /* set matrix "i" values */
1933   a->i[0] = 0;
1934   for (i=1; i<= mbs; i++) {
1935     a->i[i]      = a->i[i-1] + browlengths[i-1];
1936     a->ilen[i-1] = browlengths[i-1];
1937   }
1938   a->nz         = 0;
1939   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1940 
1941   /* read in nonzero values */
1942   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1943   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1944   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1945 
1946   /* set "a" and "j" values into matrix */
1947   nzcount = 0; jcount = 0;
1948   for (i=0; i<mbs; i++) {
1949     nzcountb = nzcount;
1950     nmask    = 0;
1951     for (j=0; j<bs; j++) {
1952       kmax = rowlengths[i*bs+j];
1953       for (k=0; k<kmax; k++) {
1954         tmp = jj[nzcount++]/bs;
1955 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1956       }
1957     }
1958     /* sort the masked values */
1959     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1960 
1961     /* set "j" values into matrix */
1962     maskcount = 1;
1963     for (j=0; j<nmask; j++) {
1964       a->j[jcount++]  = masked[j];
1965       mask[masked[j]] = maskcount++;
1966     }
1967     /* set "a" values into matrix */
1968     ishift = bs2*a->i[i];
1969     for (j=0; j<bs; j++) {
1970       kmax = rowlengths[i*bs+j];
1971       for (k=0; k<kmax; k++) {
1972         tmp       = jj[nzcountb]/bs ;
1973         block     = mask[tmp] - 1;
1974         point     = jj[nzcountb] - bs*tmp;
1975         idx       = ishift + bs2*block + j + bs*point;
1976         a->a[idx] = (MatScalar)aa[nzcountb++];
1977       }
1978     }
1979     /* zero out the mask elements we set */
1980     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1981   }
1982   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1983 
1984   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1985   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1986   ierr = PetscFree(aa);CHKERRQ(ierr);
1987   ierr = PetscFree(jj);CHKERRQ(ierr);
1988   ierr = PetscFree(mask);CHKERRQ(ierr);
1989 
1990   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1991   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1992   ierr = MatView_Private(B);CHKERRQ(ierr);
1993 
1994   *A = B;
1995   PetscFunctionReturn(0);
1996 }
1997 EXTERN_C_END
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatCreateSeqBAIJ"
2001 /*@C
2002    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2003    compressed row) format.  For good matrix assembly performance the
2004    user should preallocate the matrix storage by setting the parameter nz
2005    (or the array nnz).  By setting these parameters accurately, performance
2006    during matrix assembly can be increased by more than a factor of 50.
2007 
2008    Collective on MPI_Comm
2009 
2010    Input Parameters:
2011 +  comm - MPI communicator, set to PETSC_COMM_SELF
2012 .  bs - size of block
2013 .  m - number of rows
2014 .  n - number of columns
2015 .  nz - number of nonzero blocks  per block row (same for all rows)
2016 -  nnz - array containing the number of nonzero blocks in the various block rows
2017          (possibly different for each block row) or PETSC_NULL
2018 
2019    Output Parameter:
2020 .  A - the matrix
2021 
2022    Options Database Keys:
2023 .   -mat_no_unroll - uses code that does not unroll the loops in the
2024                      block calculations (much slower)
2025 .    -mat_block_size - size of the blocks to use
2026 
2027    Level: intermediate
2028 
2029    Notes:
2030    A nonzero block is any block that as 1 or more nonzeros in it
2031 
2032    The block AIJ format is fully compatible with standard Fortran 77
2033    storage.  That is, the stored row and column indices can begin at
2034    either one (as in Fortran) or zero.  See the users' manual for details.
2035 
2036    Specify the preallocated storage with either nz or nnz (not both).
2037    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2038    allocation.  For additional details, see the users manual chapter on
2039    matrices.
2040 
2041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2042 @*/
2043 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2044 {
2045   int ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2049   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2050   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2056 /*@C
2057    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2058    per row in the matrix. For good matrix assembly performance the
2059    user should preallocate the matrix storage by setting the parameter nz
2060    (or the array nnz).  By setting these parameters accurately, performance
2061    during matrix assembly can be increased by more than a factor of 50.
2062 
2063    Collective on MPI_Comm
2064 
2065    Input Parameters:
2066 +  A - the matrix
2067 .  bs - size of block
2068 .  nz - number of block nonzeros per block row (same for all rows)
2069 -  nnz - array containing the number of block nonzeros in the various block rows
2070          (possibly different for each block row) or PETSC_NULL
2071 
2072    Options Database Keys:
2073 .   -mat_no_unroll - uses code that does not unroll the loops in the
2074                      block calculations (much slower)
2075 .    -mat_block_size - size of the blocks to use
2076 
2077    Level: intermediate
2078 
2079    Notes:
2080    The block AIJ format is fully compatible with standard Fortran 77
2081    storage.  That is, the stored row and column indices can begin at
2082    either one (as in Fortran) or zero.  See the users' manual for details.
2083 
2084    Specify the preallocated storage with either nz or nnz (not both).
2085    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2086    allocation.  For additional details, see the users manual chapter on
2087    matrices.
2088 
2089 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2090 @*/
2091 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2092 {
2093   int ierr,(*f)(Mat,int,int,const int[]);
2094 
2095   PetscFunctionBegin;
2096   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2097   if (f) {
2098     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2099   }
2100   PetscFunctionReturn(0);
2101 }
2102