xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2fc52814d27bf1f4e71021c1c3ebb532b583ed60)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25 {
26   Mat               A = *AA;
27   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
28   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int               *ai=a->i,*ailen=a->ilen;
30   int               *aj=a->j,stepval;
31   const PetscScalar *value = v;
32   MatScalar         *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 
163 #define CHUNKSIZE  10
164 
165 /*
166      Checks for missing diagonals
167 */
168 #undef __FUNCT__
169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
170 int MatMissingDiagonal_SeqBAIJ(Mat A)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173   int         *diag,*jj = a->j,i,ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
177   diag = a->diag;
178   for (i=0; i<a->mbs; i++) {
179     if (jj[diag[i]] != i) {
180       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181     }
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
188 int MatMarkDiagonal_SeqBAIJ(Mat A)
189 {
190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191   int         i,j,*diag,m = a->mbs,ierr;
192 
193   PetscFunctionBegin;
194   if (a->diag) PetscFunctionReturn(0);
195 
196   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
197   PetscLogObjectMemory(A,(m+1)*sizeof(int));
198   for (i=0; i<m; i++) {
199     diag[i] = a->i[i+1];
200     for (j=a->i[i]; j<a->i[i+1]; j++) {
201       if (a->j[j] == i) {
202         diag[i] = j;
203         break;
204       }
205     }
206   }
207   a->diag = diag;
208   PetscFunctionReturn(0);
209 }
210 
211 
212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217 {
218   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219   int         ierr,n = a->mbs,i;
220 
221   PetscFunctionBegin;
222   *nn = n;
223   if (!ia) PetscFunctionReturn(0);
224   if (symmetric) {
225     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
226   } else if (oshift == 1) {
227     /* temporarily add 1 to i and j indices */
228     int nz = a->i[n];
229     for (i=0; i<nz; i++) a->j[i]++;
230     for (i=0; i<n+1; i++) a->i[i]++;
231     *ia = a->i; *ja = a->j;
232   } else {
233     *ia = a->i; *ja = a->j;
234   }
235 
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242 {
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244   int         i,n = a->mbs,ierr;
245 
246   PetscFunctionBegin;
247   if (!ia) PetscFunctionReturn(0);
248   if (symmetric) {
249     ierr = PetscFree(*ia);CHKERRQ(ierr);
250     ierr = PetscFree(*ja);CHKERRQ(ierr);
251   } else if (oshift == 1) {
252     int nz = a->i[n]-1;
253     for (i=0; i<nz; i++) a->j[i]--;
254     for (i=0; i<n+1; i++) a->i[i]--;
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262 {
263   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
264 
265   PetscFunctionBegin;
266   *bs = baij->bs;
267   PetscFunctionReturn(0);
268 }
269 
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_SeqBAIJ"
273 int MatDestroy_SeqBAIJ(Mat A)
274 {
275   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276   int         ierr;
277 
278   PetscFunctionBegin;
279 #if defined(PETSC_USE_LOG)
280   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281 #endif
282   ierr = PetscFree(a->a);CHKERRQ(ierr);
283   if (!a->singlemalloc) {
284     ierr = PetscFree(a->i);CHKERRQ(ierr);
285     ierr = PetscFree(a->j);CHKERRQ(ierr);
286   }
287   if (a->row) {
288     ierr = ISDestroy(a->row);CHKERRQ(ierr);
289   }
290   if (a->col) {
291     ierr = ISDestroy(a->col);CHKERRQ(ierr);
292   }
293   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
294   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
295   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
296   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
297   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
298   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
299   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
300 #if defined(PETSC_USE_MAT_SINGLE)
301   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
302 #endif
303   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
304 
305   ierr = PetscFree(a);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSetOption_SeqBAIJ"
311 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312 {
313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
314 
315   PetscFunctionBegin;
316   switch (op) {
317   case MAT_ROW_ORIENTED:
318     a->roworiented    = PETSC_TRUE;
319     break;
320   case MAT_COLUMN_ORIENTED:
321     a->roworiented    = PETSC_FALSE;
322     break;
323   case MAT_COLUMNS_SORTED:
324     a->sorted         = PETSC_TRUE;
325     break;
326   case MAT_COLUMNS_UNSORTED:
327     a->sorted         = PETSC_FALSE;
328     break;
329   case MAT_KEEP_ZEROED_ROWS:
330     a->keepzeroedrows = PETSC_TRUE;
331     break;
332   case MAT_NO_NEW_NONZERO_LOCATIONS:
333     a->nonew          = 1;
334     break;
335   case MAT_NEW_NONZERO_LOCATION_ERR:
336     a->nonew          = -1;
337     break;
338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339     a->nonew          = -2;
340     break;
341   case MAT_YES_NEW_NONZERO_LOCATIONS:
342     a->nonew          = 0;
343     break;
344   case MAT_ROWS_SORTED:
345   case MAT_ROWS_UNSORTED:
346   case MAT_YES_NEW_DIAGONALS:
347   case MAT_IGNORE_OFF_PROC_ENTRIES:
348   case MAT_USE_HASH_TABLE:
349     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350     break;
351   case MAT_NO_NEW_DIAGONALS:
352     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353   case MAT_SYMMETRIC:
354   case MAT_STRUCTURALLY_SYMMETRIC:
355   case MAT_NOT_SYMMETRIC:
356   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
357   case MAT_HERMITIAN:
358   case MAT_NOT_HERMITIAN:
359   case MAT_SYMMETRY_ETERNAL:
360   case MAT_NOT_SYMMETRY_ETERNAL:
361     break;
362   default:
363     SETERRQ(PETSC_ERR_SUP,"unknown option");
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "MatGetRow_SeqBAIJ"
370 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
371 {
372   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
373   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
374   MatScalar    *aa,*aa_i;
375   PetscScalar  *v_i;
376 
377   PetscFunctionBegin;
378   bs  = a->bs;
379   ai  = a->i;
380   aj  = a->j;
381   aa  = a->a;
382   bs2 = a->bs2;
383 
384   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
385 
386   bn  = row/bs;   /* Block number */
387   bp  = row % bs; /* Block Position */
388   M   = ai[bn+1] - ai[bn];
389   *nz = bs*M;
390 
391   if (v) {
392     *v = 0;
393     if (*nz) {
394       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
395       for (i=0; i<M; i++) { /* for each block in the block row */
396         v_i  = *v + i*bs;
397         aa_i = aa + bs2*(ai[bn] + i);
398         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
399       }
400     }
401   }
402 
403   if (idx) {
404     *idx = 0;
405     if (*nz) {
406       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
407       for (i=0; i<M; i++) { /* for each block in the block row */
408         idx_i = *idx + i*bs;
409         itmp  = bs*aj[ai[bn] + i];
410         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
411       }
412     }
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
419 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
420 {
421   int ierr;
422 
423   PetscFunctionBegin;
424   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
425   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatTranspose_SeqBAIJ"
431 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
432 {
433   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
434   Mat         C;
435   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
436   int         *rows,*cols,bs2=a->bs2;
437   PetscScalar *array;
438 
439   PetscFunctionBegin;
440   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
441   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
442   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
443 
444 #if defined(PETSC_USE_MAT_SINGLE)
445   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
446   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
447 #else
448   array = a->a;
449 #endif
450 
451   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
452   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
453   ierr = PetscFree(col);CHKERRQ(ierr);
454   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
455   cols = rows + bs;
456   for (i=0; i<mbs; i++) {
457     cols[0] = i*bs;
458     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
459     len = ai[i+1] - ai[i];
460     for (j=0; j<len; j++) {
461       rows[0] = (*aj++)*bs;
462       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
463       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
464       array += bs2;
465     }
466   }
467   ierr = PetscFree(rows);CHKERRQ(ierr);
468 #if defined(PETSC_USE_MAT_SINGLE)
469   ierr = PetscFree(array);
470 #endif
471 
472   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
473   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
474 
475   if (B) {
476     *B = C;
477   } else {
478     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
485 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
486 {
487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
488   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
489   PetscScalar *aa;
490   FILE        *file;
491 
492   PetscFunctionBegin;
493   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
494   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
495   col_lens[0] = MAT_FILE_COOKIE;
496 
497   col_lens[1] = A->m;
498   col_lens[2] = A->n;
499   col_lens[3] = a->nz*bs2;
500 
501   /* store lengths of each row and write (including header) to file */
502   count = 0;
503   for (i=0; i<a->mbs; i++) {
504     for (j=0; j<bs; j++) {
505       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
506     }
507   }
508   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
509   ierr = PetscFree(col_lens);CHKERRQ(ierr);
510 
511   /* store column indices (zero start index) */
512   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<a->mbs; i++) {
515     for (j=0; j<bs; j++) {
516       for (k=a->i[i]; k<a->i[i+1]; k++) {
517         for (l=0; l<bs; l++) {
518           jj[count++] = bs*a->j[k] + l;
519         }
520       }
521     }
522   }
523   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
524   ierr = PetscFree(jj);CHKERRQ(ierr);
525 
526   /* store nonzero values */
527   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
528   count = 0;
529   for (i=0; i<a->mbs; i++) {
530     for (j=0; j<bs; j++) {
531       for (k=a->i[i]; k<a->i[i+1]; k++) {
532         for (l=0; l<bs; l++) {
533           aa[count++] = a->a[bs2*k + l*bs + j];
534         }
535       }
536     }
537   }
538   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
539   ierr = PetscFree(aa);CHKERRQ(ierr);
540 
541   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
542   if (file) {
543     fprintf(file,"-matload_block_size %d\n",a->bs);
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
550 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
551 {
552   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
553   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
554   PetscViewerFormat format;
555 
556   PetscFunctionBegin;
557   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
558   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
559     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
560   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
561     Mat aij;
562     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
563     ierr = MatView(aij,viewer);CHKERRQ(ierr);
564     ierr = MatDestroy(aij);CHKERRQ(ierr);
565   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
566      PetscFunctionReturn(0);
567   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
568     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
569     for (i=0; i<a->mbs; i++) {
570       for (j=0; j<bs; j++) {
571         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
572         for (k=a->i[i]; k<a->i[i+1]; k++) {
573           for (l=0; l<bs; l++) {
574 #if defined(PETSC_USE_COMPLEX)
575             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
576               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
577                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
578             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
579               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
580                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
581             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
582               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
583             }
584 #else
585             if (a->a[bs2*k + l*bs + j] != 0.0) {
586               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
587             }
588 #endif
589           }
590         }
591         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
592       }
593     }
594     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
595   } else {
596     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
597     for (i=0; i<a->mbs; i++) {
598       for (j=0; j<bs; j++) {
599         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
600         for (k=a->i[i]; k<a->i[i+1]; k++) {
601           for (l=0; l<bs; l++) {
602 #if defined(PETSC_USE_COMPLEX)
603             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
604               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
605                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
606             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
607               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
608                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
609             } else {
610               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
611             }
612 #else
613             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
614 #endif
615           }
616         }
617         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
618       }
619     }
620     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
621   }
622   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
628 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
629 {
630   Mat          A = (Mat) Aa;
631   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
632   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
633   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
634   MatScalar    *aa;
635   PetscViewer  viewer;
636 
637   PetscFunctionBegin;
638 
639   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
640   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
641 
642   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
643 
644   /* loop over matrix elements drawing boxes */
645   color = PETSC_DRAW_BLUE;
646   for (i=0,row=0; i<mbs; i++,row+=bs) {
647     for (j=a->i[i]; j<a->i[i+1]; j++) {
648       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
649       x_l = a->j[j]*bs; x_r = x_l + 1.0;
650       aa = a->a + j*bs2;
651       for (k=0; k<bs; k++) {
652         for (l=0; l<bs; l++) {
653           if (PetscRealPart(*aa++) >=  0.) continue;
654           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
655         }
656       }
657     }
658   }
659   color = PETSC_DRAW_CYAN;
660   for (i=0,row=0; i<mbs; i++,row+=bs) {
661     for (j=a->i[i]; j<a->i[i+1]; j++) {
662       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
663       x_l = a->j[j]*bs; x_r = x_l + 1.0;
664       aa = a->a + j*bs2;
665       for (k=0; k<bs; k++) {
666         for (l=0; l<bs; l++) {
667           if (PetscRealPart(*aa++) != 0.) continue;
668           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
669         }
670       }
671     }
672   }
673 
674   color = PETSC_DRAW_RED;
675   for (i=0,row=0; i<mbs; i++,row+=bs) {
676     for (j=a->i[i]; j<a->i[i+1]; j++) {
677       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
678       x_l = a->j[j]*bs; x_r = x_l + 1.0;
679       aa = a->a + j*bs2;
680       for (k=0; k<bs; k++) {
681         for (l=0; l<bs; l++) {
682           if (PetscRealPart(*aa++) <= 0.) continue;
683           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
684         }
685       }
686     }
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
693 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
694 {
695   int          ierr;
696   PetscReal    xl,yl,xr,yr,w,h;
697   PetscDraw    draw;
698   PetscTruth   isnull;
699 
700   PetscFunctionBegin;
701 
702   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
703   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
704 
705   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
706   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
707   xr += w;    yr += h;  xl = -w;     yl = -h;
708   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
709   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
710   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatView_SeqBAIJ"
716 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
717 {
718   int        ierr;
719   PetscTruth issocket,isascii,isbinary,isdraw;
720 
721   PetscFunctionBegin;
722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
723   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
724   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
725   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
726   if (issocket) {
727     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
728   } else if (isascii){
729     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
730   } else if (isbinary) {
731     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
732   } else if (isdraw) {
733     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatGetValues_SeqBAIJ"
743 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
744 {
745   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
746   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
747   int        *ai = a->i,*ailen = a->ilen;
748   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
749   MatScalar  *ap,*aa = a->a,zero = 0.0;
750 
751   PetscFunctionBegin;
752   for (k=0; k<m; k++) { /* loop over rows */
753     row  = im[k]; brow = row/bs;
754     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
755     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
756     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
757     nrow = ailen[brow];
758     for (l=0; l<n; l++) { /* loop over columns */
759       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
760       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
761       col  = in[l] ;
762       bcol = col/bs;
763       cidx = col%bs;
764       ridx = row%bs;
765       high = nrow;
766       low  = 0; /* assume unsorted */
767       while (high-low > 5) {
768         t = (low+high)/2;
769         if (rp[t] > bcol) high = t;
770         else             low  = t;
771       }
772       for (i=low; i<high; i++) {
773         if (rp[i] > bcol) break;
774         if (rp[i] == bcol) {
775           *v++ = ap[bs2*i+bs*cidx+ridx];
776           goto finished;
777         }
778       }
779       *v++ = zero;
780       finished:;
781     }
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #if defined(PETSC_USE_MAT_SINGLE)
787 #undef __FUNCT__
788 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
789 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
790 {
791   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
792   int         ierr,i,N = m*n*b->bs2;
793   MatScalar   *vsingle;
794 
795   PetscFunctionBegin;
796   if (N > b->setvalueslen) {
797     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
798     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
799     b->setvalueslen  = N;
800   }
801   vsingle = b->setvaluescopy;
802   for (i=0; i<N; i++) {
803     vsingle[i] = v[i];
804   }
805   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 #endif
809 
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
813 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
814 {
815   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
816   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
817   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
818   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
819   PetscTruth        roworiented=a->roworiented;
820   const MatScalar   *value = v;
821   MatScalar         *ap,*aa = a->a,*bap;
822 
823   PetscFunctionBegin;
824   if (roworiented) {
825     stepval = (n-1)*bs;
826   } else {
827     stepval = (m-1)*bs;
828   }
829   for (k=0; k<m; k++) { /* loop over added rows */
830     row  = im[k];
831     if (row < 0) continue;
832 #if defined(PETSC_USE_BOPT_g)
833     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
834 #endif
835     rp   = aj + ai[row];
836     ap   = aa + bs2*ai[row];
837     rmax = imax[row];
838     nrow = ailen[row];
839     low  = 0;
840     for (l=0; l<n; l++) { /* loop over added columns */
841       if (in[l] < 0) continue;
842 #if defined(PETSC_USE_BOPT_g)
843       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
844 #endif
845       col = in[l];
846       if (roworiented) {
847         value = v + k*(stepval+bs)*bs + l*bs;
848       } else {
849         value = v + l*(stepval+bs)*bs + k*bs;
850       }
851       if (!sorted) low = 0; high = nrow;
852       while (high-low > 7) {
853         t = (low+high)/2;
854         if (rp[t] > col) high = t;
855         else             low  = t;
856       }
857       for (i=low; i<high; i++) {
858         if (rp[i] > col) break;
859         if (rp[i] == col) {
860           bap  = ap +  bs2*i;
861           if (roworiented) {
862             if (is == ADD_VALUES) {
863               for (ii=0; ii<bs; ii++,value+=stepval) {
864                 for (jj=ii; jj<bs2; jj+=bs) {
865                   bap[jj] += *value++;
866                 }
867               }
868             } else {
869               for (ii=0; ii<bs; ii++,value+=stepval) {
870                 for (jj=ii; jj<bs2; jj+=bs) {
871                   bap[jj] = *value++;
872                 }
873               }
874             }
875           } else {
876             if (is == ADD_VALUES) {
877               for (ii=0; ii<bs; ii++,value+=stepval) {
878                 for (jj=0; jj<bs; jj++) {
879                   *bap++ += *value++;
880                 }
881               }
882             } else {
883               for (ii=0; ii<bs; ii++,value+=stepval) {
884                 for (jj=0; jj<bs; jj++) {
885                   *bap++  = *value++;
886                 }
887               }
888             }
889           }
890           goto noinsert2;
891         }
892       }
893       if (nonew == 1) goto noinsert2;
894       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
895       if (nrow >= rmax) {
896         /* there is no extra room in row, therefore enlarge */
897         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
898         MatScalar *new_a;
899 
900         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
901 
902         /* malloc new storage space */
903         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
904 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
905         new_j   = (int*)(new_a + bs2*new_nz);
906         new_i   = new_j + new_nz;
907 
908         /* copy over old data into new slots */
909         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
910         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
911         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
912         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
913         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
914         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
915         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
916         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
917         /* free up old matrix storage */
918         ierr = PetscFree(a->a);CHKERRQ(ierr);
919         if (!a->singlemalloc) {
920           ierr = PetscFree(a->i);CHKERRQ(ierr);
921           ierr = PetscFree(a->j);CHKERRQ(ierr);
922         }
923         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
924         a->singlemalloc = PETSC_TRUE;
925 
926         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
927         rmax = imax[row] = imax[row] + CHUNKSIZE;
928         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
929         a->maxnz += bs2*CHUNKSIZE;
930         a->reallocs++;
931         a->nz++;
932       }
933       N = nrow++ - 1;
934       /* shift up all the later entries in this row */
935       for (ii=N; ii>=i; ii--) {
936         rp[ii+1] = rp[ii];
937         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
938       }
939       if (N >= i) {
940         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
941       }
942       rp[i] = col;
943       bap   = ap +  bs2*i;
944       if (roworiented) {
945         for (ii=0; ii<bs; ii++,value+=stepval) {
946           for (jj=ii; jj<bs2; jj+=bs) {
947             bap[jj] = *value++;
948           }
949         }
950       } else {
951         for (ii=0; ii<bs; ii++,value+=stepval) {
952           for (jj=0; jj<bs; jj++) {
953             *bap++  = *value++;
954           }
955         }
956       }
957       noinsert2:;
958       low = i;
959     }
960     ailen[row] = nrow;
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
967 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
968 {
969   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
970   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
971   int        m = A->m,*ip,N,*ailen = a->ilen;
972   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
973   MatScalar  *aa = a->a,*ap;
974 
975   PetscFunctionBegin;
976   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
977 
978   if (m) rmax = ailen[0];
979   for (i=1; i<mbs; i++) {
980     /* move each row back by the amount of empty slots (fshift) before it*/
981     fshift += imax[i-1] - ailen[i-1];
982     rmax   = PetscMax(rmax,ailen[i]);
983     if (fshift) {
984       ip = aj + ai[i]; ap = aa + bs2*ai[i];
985       N = ailen[i];
986       for (j=0; j<N; j++) {
987         ip[j-fshift] = ip[j];
988         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
989       }
990     }
991     ai[i] = ai[i-1] + ailen[i-1];
992   }
993   if (mbs) {
994     fshift += imax[mbs-1] - ailen[mbs-1];
995     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
996   }
997   /* reset ilen and imax for each row */
998   for (i=0; i<mbs; i++) {
999     ailen[i] = imax[i] = ai[i+1] - ai[i];
1000   }
1001   a->nz = ai[mbs];
1002 
1003   /* diagonals may have moved, so kill the diagonal pointers */
1004   if (fshift && a->diag) {
1005     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1006     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1007     a->diag = 0;
1008   }
1009   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1010   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1011   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1012   a->reallocs          = 0;
1013   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1014 
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 
1019 
1020 /*
1021    This function returns an array of flags which indicate the locations of contiguous
1022    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1023    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1024    Assume: sizes should be long enough to hold all the values.
1025 */
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1028 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1029 {
1030   int        i,j,k,row;
1031   PetscTruth flg;
1032 
1033   PetscFunctionBegin;
1034   for (i=0,j=0; i<n; j++) {
1035     row = idx[i];
1036     if (row%bs!=0) { /* Not the begining of a block */
1037       sizes[j] = 1;
1038       i++;
1039     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1040       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1041       i++;
1042     } else { /* Begining of the block, so check if the complete block exists */
1043       flg = PETSC_TRUE;
1044       for (k=1; k<bs; k++) {
1045         if (row+k != idx[i+k]) { /* break in the block */
1046           flg = PETSC_FALSE;
1047           break;
1048         }
1049       }
1050       if (flg == PETSC_TRUE) { /* No break in the bs */
1051         sizes[j] = bs;
1052         i+= bs;
1053       } else {
1054         sizes[j] = 1;
1055         i++;
1056       }
1057     }
1058   }
1059   *bs_max = j;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1065 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1066 {
1067   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1068   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1069   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1070   PetscScalar zero = 0.0;
1071   MatScalar   *aa;
1072 
1073   PetscFunctionBegin;
1074   /* Make a copy of the IS and  sort it */
1075   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1076   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1077 
1078   /* allocate memory for rows,sizes */
1079   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1080   sizes = rows + is_n;
1081 
1082   /* copy IS values to rows, and sort them */
1083   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1084   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1085   if (baij->keepzeroedrows) {
1086     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1087     bs_max = is_n;
1088   } else {
1089     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1090   }
1091   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1092 
1093   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1094     row   = rows[j];
1095     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1096     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1097     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1098     if (sizes[i] == bs && !baij->keepzeroedrows) {
1099       if (diag) {
1100         if (baij->ilen[row/bs] > 0) {
1101           baij->ilen[row/bs]       = 1;
1102           baij->j[baij->i[row/bs]] = row/bs;
1103           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1104         }
1105         /* Now insert all the diagonal values for this bs */
1106         for (k=0; k<bs; k++) {
1107           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1108         }
1109       } else { /* (!diag) */
1110         baij->ilen[row/bs] = 0;
1111       } /* end (!diag) */
1112     } else { /* (sizes[i] != bs) */
1113 #if defined (PETSC_USE_DEBUG)
1114       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1115 #endif
1116       for (k=0; k<count; k++) {
1117         aa[0] =  zero;
1118         aa    += bs;
1119       }
1120       if (diag) {
1121         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1122       }
1123     }
1124   }
1125 
1126   ierr = PetscFree(rows);CHKERRQ(ierr);
1127   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1133 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1134 {
1135   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1136   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1137   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1138   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1139   int         ridx,cidx,bs2=a->bs2,ierr;
1140   PetscTruth  roworiented=a->roworiented;
1141   MatScalar   *ap,value,*aa=a->a,*bap;
1142 
1143   PetscFunctionBegin;
1144   for (k=0; k<m; k++) { /* loop over added rows */
1145     row  = im[k]; brow = row/bs;
1146     if (row < 0) continue;
1147 #if defined(PETSC_USE_BOPT_g)
1148     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1149 #endif
1150     rp   = aj + ai[brow];
1151     ap   = aa + bs2*ai[brow];
1152     rmax = imax[brow];
1153     nrow = ailen[brow];
1154     low  = 0;
1155     for (l=0; l<n; l++) { /* loop over added columns */
1156       if (in[l] < 0) continue;
1157 #if defined(PETSC_USE_BOPT_g)
1158       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1159 #endif
1160       col = in[l]; bcol = col/bs;
1161       ridx = row % bs; cidx = col % bs;
1162       if (roworiented) {
1163         value = v[l + k*n];
1164       } else {
1165         value = v[k + l*m];
1166       }
1167       if (!sorted) low = 0; high = nrow;
1168       while (high-low > 7) {
1169         t = (low+high)/2;
1170         if (rp[t] > bcol) high = t;
1171         else              low  = t;
1172       }
1173       for (i=low; i<high; i++) {
1174         if (rp[i] > bcol) break;
1175         if (rp[i] == bcol) {
1176           bap  = ap +  bs2*i + bs*cidx + ridx;
1177           if (is == ADD_VALUES) *bap += value;
1178           else                  *bap  = value;
1179           goto noinsert1;
1180         }
1181       }
1182       if (nonew == 1) goto noinsert1;
1183       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1184       if (nrow >= rmax) {
1185         /* there is no extra room in row, therefore enlarge */
1186         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1187         MatScalar *new_a;
1188 
1189         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1190 
1191         /* Malloc new storage space */
1192         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1193 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1194         new_j   = (int*)(new_a + bs2*new_nz);
1195         new_i   = new_j + new_nz;
1196 
1197         /* copy over old data into new slots */
1198         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1199         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1200         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1201         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1202         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1203         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1204         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1205         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1206         /* free up old matrix storage */
1207         ierr = PetscFree(a->a);CHKERRQ(ierr);
1208         if (!a->singlemalloc) {
1209           ierr = PetscFree(a->i);CHKERRQ(ierr);
1210           ierr = PetscFree(a->j);CHKERRQ(ierr);
1211         }
1212         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1213         a->singlemalloc = PETSC_TRUE;
1214 
1215         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1216         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1217         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1218         a->maxnz += bs2*CHUNKSIZE;
1219         a->reallocs++;
1220         a->nz++;
1221       }
1222       N = nrow++ - 1;
1223       /* shift up all the later entries in this row */
1224       for (ii=N; ii>=i; ii--) {
1225         rp[ii+1] = rp[ii];
1226         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1227       }
1228       if (N>=i) {
1229         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1230       }
1231       rp[i]                      = bcol;
1232       ap[bs2*i + bs*cidx + ridx] = value;
1233       noinsert1:;
1234       low = i;
1235     }
1236     ailen[brow] = nrow;
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1244 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1245 {
1246   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1247   Mat         outA;
1248   int         ierr;
1249   PetscTruth  row_identity,col_identity;
1250 
1251   PetscFunctionBegin;
1252   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1253   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1254   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1255   if (!row_identity || !col_identity) {
1256     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1257   }
1258 
1259   outA          = inA;
1260   inA->factor   = FACTOR_LU;
1261 
1262   if (!a->diag) {
1263     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1264   }
1265 
1266   a->row        = row;
1267   a->col        = col;
1268   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1269   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1270 
1271   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1272   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1273   PetscLogObjectParent(inA,a->icol);
1274 
1275   /*
1276       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1277       for ILU(0) factorization with natural ordering
1278   */
1279   if (a->bs < 8) {
1280     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1281   } else {
1282     if (!a->solve_work) {
1283       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1284       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1285     }
1286   }
1287 
1288   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1289 
1290   PetscFunctionReturn(0);
1291 }
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1294 int MatPrintHelp_SeqBAIJ(Mat A)
1295 {
1296   static PetscTruth called = PETSC_FALSE;
1297   MPI_Comm          comm = A->comm;
1298   int               ierr;
1299 
1300   PetscFunctionBegin;
1301   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1302   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1303   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 EXTERN_C_BEGIN
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1310 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1311 {
1312   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1313   int         i,nz,nbs;
1314 
1315   PetscFunctionBegin;
1316   nz  = baij->maxnz/baij->bs2;
1317   nbs = baij->nbs;
1318   for (i=0; i<nz; i++) {
1319     baij->j[i] = indices[i];
1320   }
1321   baij->nz = nz;
1322   for (i=0; i<nbs; i++) {
1323     baij->ilen[i] = baij->imax[i];
1324   }
1325 
1326   PetscFunctionReturn(0);
1327 }
1328 EXTERN_C_END
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1332 /*@
1333     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1334        in the matrix.
1335 
1336   Input Parameters:
1337 +  mat - the SeqBAIJ matrix
1338 -  indices - the column indices
1339 
1340   Level: advanced
1341 
1342   Notes:
1343     This can be called if you have precomputed the nonzero structure of the
1344   matrix and want to provide it to the matrix object to improve the performance
1345   of the MatSetValues() operation.
1346 
1347     You MUST have set the correct numbers of nonzeros per row in the call to
1348   MatCreateSeqBAIJ().
1349 
1350     MUST be called before any calls to MatSetValues();
1351 
1352 @*/
1353 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1354 {
1355   int ierr,(*f)(Mat,int *);
1356 
1357   PetscFunctionBegin;
1358   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1359   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1360   if (f) {
1361     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1362   } else {
1363     SETERRQ(1,"Wrong type of matrix to set column indices");
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1370 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1371 {
1372   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1373   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1374   PetscReal    atmp;
1375   PetscScalar  *x,zero = 0.0;
1376   MatScalar    *aa;
1377   int          ncols,brow,krow,kcol;
1378 
1379   PetscFunctionBegin;
1380   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1381   bs   = a->bs;
1382   aa   = a->a;
1383   ai   = a->i;
1384   aj   = a->j;
1385   mbs = a->mbs;
1386 
1387   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1388   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1389   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1390   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1391   for (i=0; i<mbs; i++) {
1392     ncols = ai[1] - ai[0]; ai++;
1393     brow  = bs*i;
1394     for (j=0; j<ncols; j++){
1395       /* bcol = bs*(*aj); */
1396       for (kcol=0; kcol<bs; kcol++){
1397         for (krow=0; krow<bs; krow++){
1398           atmp = PetscAbsScalar(*aa); aa++;
1399           row = brow + krow;    /* row index */
1400           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1401           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1402         }
1403       }
1404       aj++;
1405     }
1406   }
1407   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1413 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1414 {
1415   int        ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1424 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1425 {
1426   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1427   PetscFunctionBegin;
1428   *array = a->a;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1434 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1435 {
1436   PetscFunctionBegin;
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #include "petscblaslapack.h"
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1443 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1444 {
1445   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1446   int          ierr,one=1,i,bs=y->bs,j,bs2;
1447 
1448   PetscFunctionBegin;
1449   if (str == SAME_NONZERO_PATTERN) {
1450     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1451   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1452     if (y->xtoy && y->XtoY != X) {
1453       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1454       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1455     }
1456     if (!y->xtoy) { /* get xtoy */
1457       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1458       y->XtoY = X;
1459     }
1460     bs2 = bs*bs;
1461     for (i=0; i<x->nz; i++) {
1462       j = 0;
1463       while (j < bs2){
1464         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1465         j++;
1466       }
1467     }
1468     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1469   } else {
1470     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /* -------------------------------------------------------------------*/
1476 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1477        MatGetRow_SeqBAIJ,
1478        MatRestoreRow_SeqBAIJ,
1479        MatMult_SeqBAIJ_N,
1480 /* 4*/ MatMultAdd_SeqBAIJ_N,
1481        MatMultTranspose_SeqBAIJ,
1482        MatMultTransposeAdd_SeqBAIJ,
1483        MatSolve_SeqBAIJ_N,
1484        0,
1485        0,
1486 /*10*/ 0,
1487        MatLUFactor_SeqBAIJ,
1488        0,
1489        0,
1490        MatTranspose_SeqBAIJ,
1491 /*15*/ MatGetInfo_SeqBAIJ,
1492        MatEqual_SeqBAIJ,
1493        MatGetDiagonal_SeqBAIJ,
1494        MatDiagonalScale_SeqBAIJ,
1495        MatNorm_SeqBAIJ,
1496 /*20*/ 0,
1497        MatAssemblyEnd_SeqBAIJ,
1498        0,
1499        MatSetOption_SeqBAIJ,
1500        MatZeroEntries_SeqBAIJ,
1501 /*25*/ MatZeroRows_SeqBAIJ,
1502        MatLUFactorSymbolic_SeqBAIJ,
1503        MatLUFactorNumeric_SeqBAIJ_N,
1504        0,
1505        0,
1506 /*30*/ MatSetUpPreallocation_SeqBAIJ,
1507        MatILUFactorSymbolic_SeqBAIJ,
1508        0,
1509        MatGetArray_SeqBAIJ,
1510        MatRestoreArray_SeqBAIJ,
1511 /*35*/ MatDuplicate_SeqBAIJ,
1512        0,
1513        0,
1514        MatILUFactor_SeqBAIJ,
1515        0,
1516 /*40*/ MatAXPY_SeqBAIJ,
1517        MatGetSubMatrices_SeqBAIJ,
1518        MatIncreaseOverlap_SeqBAIJ,
1519        MatGetValues_SeqBAIJ,
1520        0,
1521 /*45*/ MatPrintHelp_SeqBAIJ,
1522        MatScale_SeqBAIJ,
1523        0,
1524        0,
1525        0,
1526 /*50*/ MatGetBlockSize_SeqBAIJ,
1527        MatGetRowIJ_SeqBAIJ,
1528        MatRestoreRowIJ_SeqBAIJ,
1529        0,
1530        0,
1531 /*55*/ 0,
1532        0,
1533        0,
1534        0,
1535        MatSetValuesBlocked_SeqBAIJ,
1536 /*60*/ MatGetSubMatrix_SeqBAIJ,
1537        MatDestroy_SeqBAIJ,
1538        MatView_SeqBAIJ,
1539        MatGetPetscMaps_Petsc,
1540        0,
1541 /*65*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*70*/ MatGetRowMax_SeqBAIJ,
1547        MatConvert_Basic,
1548        0,
1549        0,
1550        0,
1551 /*75*/ 0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*80*/ 0,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*85*/ MatLoad_SeqBAIJ
1562 };
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1567 int MatStoreValues_SeqBAIJ(Mat mat)
1568 {
1569   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1570   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1571   int         ierr;
1572 
1573   PetscFunctionBegin;
1574   if (aij->nonew != 1) {
1575     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1576   }
1577 
1578   /* allocate space for values if not already there */
1579   if (!aij->saved_values) {
1580     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1581   }
1582 
1583   /* copy values over */
1584   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 EXTERN_C_END
1588 
1589 EXTERN_C_BEGIN
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1592 int MatRetrieveValues_SeqBAIJ(Mat mat)
1593 {
1594   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1595   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1596 
1597   PetscFunctionBegin;
1598   if (aij->nonew != 1) {
1599     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1600   }
1601   if (!aij->saved_values) {
1602     SETERRQ(1,"Must call MatStoreValues(A);first");
1603   }
1604 
1605   /* copy values over */
1606   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 EXTERN_C_END
1610 
1611 EXTERN_C_BEGIN
1612 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1613 EXTERN_C_END
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1618 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1619 {
1620   Mat_SeqBAIJ *b;
1621   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1622   PetscTruth  flg;
1623 
1624   PetscFunctionBegin;
1625 
1626   B->preallocated = PETSC_TRUE;
1627   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1628   if (nnz && newbs != bs) {
1629     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1630   }
1631   bs   = newbs;
1632 
1633   mbs  = B->m/bs;
1634   nbs  = B->n/bs;
1635   bs2  = bs*bs;
1636 
1637   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1638     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1639   }
1640 
1641   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1642   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1643   if (nnz) {
1644     for (i=0; i<mbs; i++) {
1645       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1646       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1647     }
1648   }
1649 
1650   b       = (Mat_SeqBAIJ*)B->data;
1651   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1652   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1653   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1654   if (!flg) {
1655     switch (bs) {
1656     case 1:
1657       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1658       B->ops->mult            = MatMult_SeqBAIJ_1;
1659       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1660       break;
1661     case 2:
1662       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1663       B->ops->mult            = MatMult_SeqBAIJ_2;
1664       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1665       break;
1666     case 3:
1667       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1668       B->ops->mult            = MatMult_SeqBAIJ_3;
1669       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1670       break;
1671     case 4:
1672       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1673       B->ops->mult            = MatMult_SeqBAIJ_4;
1674       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1675       break;
1676     case 5:
1677       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1678       B->ops->mult            = MatMult_SeqBAIJ_5;
1679       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1680       break;
1681     case 6:
1682       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1683       B->ops->mult            = MatMult_SeqBAIJ_6;
1684       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1685       break;
1686     case 7:
1687       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1688       B->ops->mult            = MatMult_SeqBAIJ_7;
1689       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1690       break;
1691     default:
1692       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1693       B->ops->mult            = MatMult_SeqBAIJ_N;
1694       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1695       break;
1696     }
1697   }
1698   b->bs      = bs;
1699   b->mbs     = mbs;
1700   b->nbs     = nbs;
1701   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1702   if (!nnz) {
1703     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1704     else if (nz <= 0)        nz = 1;
1705     for (i=0; i<mbs; i++) b->imax[i] = nz;
1706     nz = nz*mbs;
1707   } else {
1708     nz = 0;
1709     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1710   }
1711 
1712   /* allocate the matrix space */
1713   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1714   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1715   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1716   b->j  = (int*)(b->a + nz*bs2);
1717   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1718   b->i  = b->j + nz;
1719   b->singlemalloc = PETSC_TRUE;
1720 
1721   b->i[0] = 0;
1722   for (i=1; i<mbs+1; i++) {
1723     b->i[i] = b->i[i-1] + b->imax[i-1];
1724   }
1725 
1726   /* b->ilen will count nonzeros in each block row so far. */
1727   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1728   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1729   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1730 
1731   b->bs               = bs;
1732   b->bs2              = bs2;
1733   b->mbs              = mbs;
1734   b->nz               = 0;
1735   b->maxnz            = nz*bs2;
1736   B->info.nz_unneeded = (PetscReal)b->maxnz;
1737   PetscFunctionReturn(0);
1738 }
1739 EXTERN_C_END
1740 
1741 /*MC
1742    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1743    block sparse compressed row format.
1744 
1745    Options Database Keys:
1746 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1747 
1748   Level: beginner
1749 
1750 .seealso: MatCreateSeqBAIJ
1751 M*/
1752 
1753 EXTERN_C_BEGIN
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatCreate_SeqBAIJ"
1756 int MatCreate_SeqBAIJ(Mat B)
1757 {
1758   int         ierr,size;
1759   Mat_SeqBAIJ *b;
1760 
1761   PetscFunctionBegin;
1762   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1763   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1764 
1765   B->m = B->M = PetscMax(B->m,B->M);
1766   B->n = B->N = PetscMax(B->n,B->N);
1767   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1768   B->data = (void*)b;
1769   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1770   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1771   B->factor           = 0;
1772   B->lupivotthreshold = 1.0;
1773   B->mapping          = 0;
1774   b->row              = 0;
1775   b->col              = 0;
1776   b->icol             = 0;
1777   b->reallocs         = 0;
1778   b->saved_values     = 0;
1779 #if defined(PETSC_USE_MAT_SINGLE)
1780   b->setvalueslen     = 0;
1781   b->setvaluescopy    = PETSC_NULL;
1782 #endif
1783 
1784   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1785   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1786 
1787   b->sorted           = PETSC_FALSE;
1788   b->roworiented      = PETSC_TRUE;
1789   b->nonew            = 0;
1790   b->diag             = 0;
1791   b->solve_work       = 0;
1792   b->mult_work        = 0;
1793   B->spptr            = 0;
1794   B->info.nz_unneeded = (PetscReal)b->maxnz;
1795   b->keepzeroedrows   = PETSC_FALSE;
1796   b->xtoy              = 0;
1797   b->XtoY              = 0;
1798 
1799   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1800                                      "MatStoreValues_SeqBAIJ",
1801                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1803                                      "MatRetrieveValues_SeqBAIJ",
1804                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1806                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1807                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1809                                      "MatConvert_SeqBAIJ_SeqAIJ",
1810                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1812                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1813                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 EXTERN_C_END
1817 
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1820 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1821 {
1822   Mat         C;
1823   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1824   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1825 
1826   PetscFunctionBegin;
1827   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1828 
1829   *B = 0;
1830   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1831   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1832   c    = (Mat_SeqBAIJ*)C->data;
1833 
1834   c->bs         = a->bs;
1835   c->bs2        = a->bs2;
1836   c->mbs        = a->mbs;
1837   c->nbs        = a->nbs;
1838   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1839 
1840   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1841   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1842   for (i=0; i<mbs; i++) {
1843     c->imax[i] = a->imax[i];
1844     c->ilen[i] = a->ilen[i];
1845   }
1846 
1847   /* allocate the matrix space */
1848   c->singlemalloc = PETSC_TRUE;
1849   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1850   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1851   c->j = (int*)(c->a + nz*bs2);
1852   c->i = c->j + nz;
1853   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1854   if (mbs > 0) {
1855     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1856     if (cpvalues == MAT_COPY_VALUES) {
1857       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1858     } else {
1859       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1860     }
1861   }
1862 
1863   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1864   c->sorted      = a->sorted;
1865   c->roworiented = a->roworiented;
1866   c->nonew       = a->nonew;
1867 
1868   if (a->diag) {
1869     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1870     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1871     for (i=0; i<mbs; i++) {
1872       c->diag[i] = a->diag[i];
1873     }
1874   } else c->diag        = 0;
1875   c->nz                 = a->nz;
1876   c->maxnz              = a->maxnz;
1877   c->solve_work         = 0;
1878   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1879   c->mult_work          = 0;
1880   C->preallocated       = PETSC_TRUE;
1881   C->assembled          = PETSC_TRUE;
1882   *B = C;
1883   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatLoad_SeqBAIJ"
1889 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1890 {
1891   Mat_SeqBAIJ  *a;
1892   Mat          B;
1893   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1894   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1895   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1896   int          *masked,nmask,tmp,bs2,ishift;
1897   PetscScalar  *aa;
1898   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1899 
1900   PetscFunctionBegin;
1901   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1902   bs2  = bs*bs;
1903 
1904   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1905   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1906   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1907   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1908   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1909   M = header[1]; N = header[2]; nz = header[3];
1910 
1911   if (header[3] < 0) {
1912     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1913   }
1914 
1915   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1916 
1917   /*
1918      This code adds extra rows to make sure the number of rows is
1919     divisible by the blocksize
1920   */
1921   mbs        = M/bs;
1922   extra_rows = bs - M + bs*(mbs);
1923   if (extra_rows == bs) extra_rows = 0;
1924   else                  mbs++;
1925   if (extra_rows) {
1926     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1927   }
1928 
1929   /* read in row lengths */
1930   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1931   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1932   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1933 
1934   /* read in column indices */
1935   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1936   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1937   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1938 
1939   /* loop over row lengths determining block row lengths */
1940   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1941   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1942   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1943   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1944   masked   = mask + mbs;
1945   rowcount = 0; nzcount = 0;
1946   for (i=0; i<mbs; i++) {
1947     nmask = 0;
1948     for (j=0; j<bs; j++) {
1949       kmax = rowlengths[rowcount];
1950       for (k=0; k<kmax; k++) {
1951         tmp = jj[nzcount++]/bs;
1952         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1953       }
1954       rowcount++;
1955     }
1956     browlengths[i] += nmask;
1957     /* zero out the mask elements we set */
1958     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1959   }
1960 
1961   /* create our matrix */
1962   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1963   ierr = MatSetType(B,type);CHKERRQ(ierr);
1964   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1965   a = (Mat_SeqBAIJ*)B->data;
1966 
1967   /* set matrix "i" values */
1968   a->i[0] = 0;
1969   for (i=1; i<= mbs; i++) {
1970     a->i[i]      = a->i[i-1] + browlengths[i-1];
1971     a->ilen[i-1] = browlengths[i-1];
1972   }
1973   a->nz         = 0;
1974   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1975 
1976   /* read in nonzero values */
1977   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1978   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1979   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1980 
1981   /* set "a" and "j" values into matrix */
1982   nzcount = 0; jcount = 0;
1983   for (i=0; i<mbs; i++) {
1984     nzcountb = nzcount;
1985     nmask    = 0;
1986     for (j=0; j<bs; j++) {
1987       kmax = rowlengths[i*bs+j];
1988       for (k=0; k<kmax; k++) {
1989         tmp = jj[nzcount++]/bs;
1990 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1991       }
1992     }
1993     /* sort the masked values */
1994     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1995 
1996     /* set "j" values into matrix */
1997     maskcount = 1;
1998     for (j=0; j<nmask; j++) {
1999       a->j[jcount++]  = masked[j];
2000       mask[masked[j]] = maskcount++;
2001     }
2002     /* set "a" values into matrix */
2003     ishift = bs2*a->i[i];
2004     for (j=0; j<bs; j++) {
2005       kmax = rowlengths[i*bs+j];
2006       for (k=0; k<kmax; k++) {
2007         tmp       = jj[nzcountb]/bs ;
2008         block     = mask[tmp] - 1;
2009         point     = jj[nzcountb] - bs*tmp;
2010         idx       = ishift + bs2*block + j + bs*point;
2011         a->a[idx] = (MatScalar)aa[nzcountb++];
2012       }
2013     }
2014     /* zero out the mask elements we set */
2015     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2016   }
2017   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2018 
2019   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2020   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2021   ierr = PetscFree(aa);CHKERRQ(ierr);
2022   ierr = PetscFree(jj);CHKERRQ(ierr);
2023   ierr = PetscFree(mask);CHKERRQ(ierr);
2024 
2025   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2026   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2027   ierr = MatView_Private(B);CHKERRQ(ierr);
2028 
2029   *A = B;
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatCreateSeqBAIJ"
2035 /*@C
2036    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2037    compressed row) format.  For good matrix assembly performance the
2038    user should preallocate the matrix storage by setting the parameter nz
2039    (or the array nnz).  By setting these parameters accurately, performance
2040    during matrix assembly can be increased by more than a factor of 50.
2041 
2042    Collective on MPI_Comm
2043 
2044    Input Parameters:
2045 +  comm - MPI communicator, set to PETSC_COMM_SELF
2046 .  bs - size of block
2047 .  m - number of rows
2048 .  n - number of columns
2049 .  nz - number of nonzero blocks  per block row (same for all rows)
2050 -  nnz - array containing the number of nonzero blocks in the various block rows
2051          (possibly different for each block row) or PETSC_NULL
2052 
2053    Output Parameter:
2054 .  A - the matrix
2055 
2056    Options Database Keys:
2057 .   -mat_no_unroll - uses code that does not unroll the loops in the
2058                      block calculations (much slower)
2059 .    -mat_block_size - size of the blocks to use
2060 
2061    Level: intermediate
2062 
2063    Notes:
2064    A nonzero block is any block that as 1 or more nonzeros in it
2065 
2066    The block AIJ format is fully compatible with standard Fortran 77
2067    storage.  That is, the stored row and column indices can begin at
2068    either one (as in Fortran) or zero.  See the users' manual for details.
2069 
2070    Specify the preallocated storage with either nz or nnz (not both).
2071    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2072    allocation.  For additional details, see the users manual chapter on
2073    matrices.
2074 
2075 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2076 @*/
2077 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2078 {
2079   int ierr;
2080 
2081   PetscFunctionBegin;
2082   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2083   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2084   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2090 /*@C
2091    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2092    per row in the matrix. For good matrix assembly performance the
2093    user should preallocate the matrix storage by setting the parameter nz
2094    (or the array nnz).  By setting these parameters accurately, performance
2095    during matrix assembly can be increased by more than a factor of 50.
2096 
2097    Collective on MPI_Comm
2098 
2099    Input Parameters:
2100 +  A - the matrix
2101 .  bs - size of block
2102 .  nz - number of block nonzeros per block row (same for all rows)
2103 -  nnz - array containing the number of block nonzeros in the various block rows
2104          (possibly different for each block row) or PETSC_NULL
2105 
2106    Options Database Keys:
2107 .   -mat_no_unroll - uses code that does not unroll the loops in the
2108                      block calculations (much slower)
2109 .    -mat_block_size - size of the blocks to use
2110 
2111    Level: intermediate
2112 
2113    Notes:
2114    The block AIJ format is fully compatible with standard Fortran 77
2115    storage.  That is, the stored row and column indices can begin at
2116    either one (as in Fortran) or zero.  See the users' manual for details.
2117 
2118    Specify the preallocated storage with either nz or nnz (not both).
2119    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2120    allocation.  For additional details, see the users manual chapter on
2121    matrices.
2122 
2123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2124 @*/
2125 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2126 {
2127   int ierr,(*f)(Mat,int,int,const int[]);
2128 
2129   PetscFunctionBegin;
2130   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2131   if (f) {
2132     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136