xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4cb7fc73c5648a2ebbfd9f6fbb092565e0a4b3bb)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 /*
12     Special version for Fun3d sequential benchmark
13 */
14 #if defined(PETSC_HAVE_FORTRAN_CAPS)
15 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
16 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
17 #define matsetvaluesblocked4_ matsetvaluesblocked4
18 #endif
19 
20 EXTERN_C_BEGIN
21 #undef __FUNCT__
22 #define __FUNCT__ "matsetvaluesblocked4_"
23 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
24 {
25   Mat               A = *AA;
26   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
27   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
28   int               *ai=a->i,*ailen=a->ilen;
29   int               *aj=a->j,stepval;
30   const PetscScalar *value = v;
31   MatScalar         *ap,*aa = a->a,*bap;
32 
33   PetscFunctionBegin;
34   stepval = (n-1)*4;
35   for (k=0; k<m; k++) { /* loop over added rows */
36     row  = im[k];
37     rp   = aj + ai[row];
38     ap   = aa + 16*ai[row];
39     nrow = ailen[row];
40     low  = 0;
41     for (l=0; l<n; l++) { /* loop over added columns */
42       col = in[l];
43       value = v + k*(stepval+4)*4 + l*4;
44       low = 0; high = nrow;
45       while (high-low > 7) {
46         t = (low+high)/2;
47         if (rp[t] > col) high = t;
48         else             low  = t;
49       }
50       for (i=low; i<high; i++) {
51         if (rp[i] > col) break;
52         if (rp[i] == col) {
53           bap  = ap +  16*i;
54           for (ii=0; ii<4; ii++,value+=stepval) {
55             for (jj=ii; jj<16; jj+=4) {
56               bap[jj] += *value++;
57             }
58           }
59           goto noinsert2;
60         }
61       }
62       N = nrow++ - 1;
63       /* shift up all the later entries in this row */
64       for (ii=N; ii>=i; ii--) {
65         rp[ii+1] = rp[ii];
66         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
67       }
68       if (N >= i) {
69         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
70       }
71       rp[i] = col;
72       bap   = ap +  16*i;
73       for (ii=0; ii<4; ii++,value+=stepval) {
74         for (jj=ii; jj<16; jj+=4) {
75           bap[jj] = *value++;
76         }
77       }
78       noinsert2:;
79       low = i;
80     }
81     ailen[row] = nrow;
82   }
83 }
84 EXTERN_C_END
85 
86 #if defined(PETSC_HAVE_FORTRAN_CAPS)
87 #define matsetvalues4_ MATSETVALUES4
88 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
89 #define matsetvalues4_ matsetvalues4
90 #endif
91 
92 EXTERN_C_BEGIN
93 #undef __FUNCT__
94 #define __FUNCT__ "MatSetValues4_"
95 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
96 {
97   Mat         A = *AA;
98   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
99   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
100   int         *ai=a->i,*ailen=a->ilen;
101   int         *aj=a->j,brow,bcol;
102   int         ridx,cidx;
103   MatScalar   *ap,value,*aa=a->a,*bap;
104 
105   PetscFunctionBegin;
106   for (k=0; k<m; k++) { /* loop over added rows */
107     row  = im[k]; brow = row/4;
108     rp   = aj + ai[brow];
109     ap   = aa + 16*ai[brow];
110     nrow = ailen[brow];
111     low  = 0;
112     for (l=0; l<n; l++) { /* loop over added columns */
113       col = in[l]; bcol = col/4;
114       ridx = row % 4; cidx = col % 4;
115       value = v[l + k*n];
116       low = 0; high = nrow;
117       while (high-low > 7) {
118         t = (low+high)/2;
119         if (rp[t] > bcol) high = t;
120         else              low  = t;
121       }
122       for (i=low; i<high; i++) {
123         if (rp[i] > bcol) break;
124         if (rp[i] == bcol) {
125           bap  = ap +  16*i + 4*cidx + ridx;
126           *bap += value;
127           goto noinsert1;
128         }
129       }
130       N = nrow++ - 1;
131       /* shift up all the later entries in this row */
132       for (ii=N; ii>=i; ii--) {
133         rp[ii+1] = rp[ii];
134         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
135       }
136       if (N>=i) {
137         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
138       }
139       rp[i]                    = bcol;
140       ap[16*i + 4*cidx + ridx] = value;
141       noinsert1:;
142       low = i;
143     }
144     ailen[brow] = nrow;
145   }
146 }
147 EXTERN_C_END
148 
149 /*  UGLY, ugly, ugly
150    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
151    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
152    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
153    converts the entries into single precision and then calls ..._MatScalar() to put them
154    into the single precision data structures.
155 */
156 #if defined(PETSC_USE_MAT_SINGLE)
157 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
158 #else
159 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
160 #endif
161 
162 #define CHUNKSIZE  10
163 
164 /*
165      Checks for missing diagonals
166 */
167 #undef __FUNCT__
168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
169 int MatMissingDiagonal_SeqBAIJ(Mat A)
170 {
171   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
172   int         *diag,*jj = a->j,i,ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
176   diag = a->diag;
177   for (i=0; i<a->mbs; i++) {
178     if (jj[diag[i]] != i) {
179       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
180     }
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
187 int MatMarkDiagonal_SeqBAIJ(Mat A)
188 {
189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
190   int         i,j,*diag,m = a->mbs,ierr;
191 
192   PetscFunctionBegin;
193   if (a->diag) PetscFunctionReturn(0);
194 
195   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
196   PetscLogObjectMemory(A,(m+1)*sizeof(int));
197   for (i=0; i<m; i++) {
198     diag[i] = a->i[i+1];
199     for (j=a->i[i]; j<a->i[i+1]; j++) {
200       if (a->j[j] == i) {
201         diag[i] = j;
202         break;
203       }
204     }
205   }
206   a->diag = diag;
207   PetscFunctionReturn(0);
208 }
209 
210 
211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218   int         ierr,n = a->mbs,i;
219 
220   PetscFunctionBegin;
221   *nn = n;
222   if (!ia) PetscFunctionReturn(0);
223   if (symmetric) {
224     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
225   } else if (oshift == 1) {
226     /* temporarily add 1 to i and j indices */
227     int nz = a->i[n];
228     for (i=0; i<nz; i++) a->j[i]++;
229     for (i=0; i<n+1; i++) a->i[i]++;
230     *ia = a->i; *ja = a->j;
231   } else {
232     *ia = a->i; *ja = a->j;
233   }
234 
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
241 {
242   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
243   int         i,n = a->mbs,ierr;
244 
245   PetscFunctionBegin;
246   if (!ia) PetscFunctionReturn(0);
247   if (symmetric) {
248     ierr = PetscFree(*ia);CHKERRQ(ierr);
249     ierr = PetscFree(*ja);CHKERRQ(ierr);
250   } else if (oshift == 1) {
251     int nz = a->i[n]-1;
252     for (i=0; i<nz; i++) a->j[i]--;
253     for (i=0; i<n+1; i++) a->i[i]--;
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
261 {
262   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
263 
264   PetscFunctionBegin;
265   *bs = baij->bs;
266   PetscFunctionReturn(0);
267 }
268 
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatDestroy_SeqBAIJ"
272 int MatDestroy_SeqBAIJ(Mat A)
273 {
274   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
275   int         ierr;
276 
277   PetscFunctionBegin;
278 #if defined(PETSC_USE_LOG)
279   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
280 #endif
281   ierr = PetscFree(a->a);CHKERRQ(ierr);
282   if (!a->singlemalloc) {
283     ierr = PetscFree(a->i);CHKERRQ(ierr);
284     ierr = PetscFree(a->j);CHKERRQ(ierr);
285   }
286   if (a->row) {
287     ierr = ISDestroy(a->row);CHKERRQ(ierr);
288   }
289   if (a->col) {
290     ierr = ISDestroy(a->col);CHKERRQ(ierr);
291   }
292   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
293   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
294   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
295   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
296   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
297   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
298   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
299   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
300 #if defined(PETSC_USE_MAT_SINGLE)
301   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
302 #endif
303   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
304 
305   ierr = PetscFree(a);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSetOption_SeqBAIJ"
311 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312 {
313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
314 
315   PetscFunctionBegin;
316   switch (op) {
317   case MAT_ROW_ORIENTED:
318     a->roworiented    = PETSC_TRUE;
319     break;
320   case MAT_COLUMN_ORIENTED:
321     a->roworiented    = PETSC_FALSE;
322     break;
323   case MAT_COLUMNS_SORTED:
324     a->sorted         = PETSC_TRUE;
325     break;
326   case MAT_COLUMNS_UNSORTED:
327     a->sorted         = PETSC_FALSE;
328     break;
329   case MAT_KEEP_ZEROED_ROWS:
330     a->keepzeroedrows = PETSC_TRUE;
331     break;
332   case MAT_NO_NEW_NONZERO_LOCATIONS:
333     a->nonew          = 1;
334     break;
335   case MAT_NEW_NONZERO_LOCATION_ERR:
336     a->nonew          = -1;
337     break;
338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
339     a->nonew          = -2;
340     break;
341   case MAT_YES_NEW_NONZERO_LOCATIONS:
342     a->nonew          = 0;
343     break;
344   case MAT_ROWS_SORTED:
345   case MAT_ROWS_UNSORTED:
346   case MAT_YES_NEW_DIAGONALS:
347   case MAT_IGNORE_OFF_PROC_ENTRIES:
348   case MAT_USE_HASH_TABLE:
349     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350     break;
351   case MAT_NO_NEW_DIAGONALS:
352     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353   case MAT_SYMMETRIC:
354   case MAT_STRUCTURALLY_SYMMETRIC:
355   case MAT_NOT_SYMMETRIC:
356   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
357   case MAT_HERMITIAN:
358   case MAT_NOT_HERMITIAN:
359   case MAT_SYMMETRY_ETERNAL:
360   case MAT_NOT_SYMMETRY_ETERNAL:
361     break;
362   default:
363     SETERRQ(PETSC_ERR_SUP,"unknown option");
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "MatGetRow_SeqBAIJ"
370 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
371 {
372   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
373   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
374   MatScalar    *aa,*aa_i;
375   PetscScalar  *v_i;
376 
377   PetscFunctionBegin;
378   bs  = a->bs;
379   ai  = a->i;
380   aj  = a->j;
381   aa  = a->a;
382   bs2 = a->bs2;
383 
384   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
385 
386   bn  = row/bs;   /* Block number */
387   bp  = row % bs; /* Block Position */
388   M   = ai[bn+1] - ai[bn];
389   *nz = bs*M;
390 
391   if (v) {
392     *v = 0;
393     if (*nz) {
394       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
395       for (i=0; i<M; i++) { /* for each block in the block row */
396         v_i  = *v + i*bs;
397         aa_i = aa + bs2*(ai[bn] + i);
398         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
399       }
400     }
401   }
402 
403   if (idx) {
404     *idx = 0;
405     if (*nz) {
406       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
407       for (i=0; i<M; i++) { /* for each block in the block row */
408         idx_i = *idx + i*bs;
409         itmp  = bs*aj[ai[bn] + i];
410         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
411       }
412     }
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
419 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
420 {
421   int ierr;
422 
423   PetscFunctionBegin;
424   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
425   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatTranspose_SeqBAIJ"
431 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
432 {
433   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
434   Mat         C;
435   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
436   int         *rows,*cols,bs2=a->bs2;
437   PetscScalar *array;
438 
439   PetscFunctionBegin;
440   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
441   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
442   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
443 
444 #if defined(PETSC_USE_MAT_SINGLE)
445   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
446   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
447 #else
448   array = a->a;
449 #endif
450 
451   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
452   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
453   ierr = PetscFree(col);CHKERRQ(ierr);
454   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
455   cols = rows + bs;
456   for (i=0; i<mbs; i++) {
457     cols[0] = i*bs;
458     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
459     len = ai[i+1] - ai[i];
460     for (j=0; j<len; j++) {
461       rows[0] = (*aj++)*bs;
462       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
463       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
464       array += bs2;
465     }
466   }
467   ierr = PetscFree(rows);CHKERRQ(ierr);
468 #if defined(PETSC_USE_MAT_SINGLE)
469   ierr = PetscFree(array);
470 #endif
471 
472   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
473   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
474 
475   if (B) {
476     *B = C;
477   } else {
478     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
485 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
486 {
487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
488   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
489   PetscScalar *aa;
490   FILE        *file;
491 
492   PetscFunctionBegin;
493   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
494   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
495   col_lens[0] = MAT_FILE_COOKIE;
496 
497   col_lens[1] = A->m;
498   col_lens[2] = A->n;
499   col_lens[3] = a->nz*bs2;
500 
501   /* store lengths of each row and write (including header) to file */
502   count = 0;
503   for (i=0; i<a->mbs; i++) {
504     for (j=0; j<bs; j++) {
505       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
506     }
507   }
508   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
509   ierr = PetscFree(col_lens);CHKERRQ(ierr);
510 
511   /* store column indices (zero start index) */
512   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<a->mbs; i++) {
515     for (j=0; j<bs; j++) {
516       for (k=a->i[i]; k<a->i[i+1]; k++) {
517         for (l=0; l<bs; l++) {
518           jj[count++] = bs*a->j[k] + l;
519         }
520       }
521     }
522   }
523   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
524   ierr = PetscFree(jj);CHKERRQ(ierr);
525 
526   /* store nonzero values */
527   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
528   count = 0;
529   for (i=0; i<a->mbs; i++) {
530     for (j=0; j<bs; j++) {
531       for (k=a->i[i]; k<a->i[i+1]; k++) {
532         for (l=0; l<bs; l++) {
533           aa[count++] = a->a[bs2*k + l*bs + j];
534         }
535       }
536     }
537   }
538   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
539   ierr = PetscFree(aa);CHKERRQ(ierr);
540 
541   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
542   if (file) {
543     fprintf(file,"-matload_block_size %d\n",a->bs);
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
550 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
551 {
552   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
553   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
554   PetscViewerFormat format;
555 
556   PetscFunctionBegin;
557   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
558   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
559     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
560   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
561     Mat aij;
562     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
563     ierr = MatView(aij,viewer);CHKERRQ(ierr);
564     ierr = MatDestroy(aij);CHKERRQ(ierr);
565   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
566      PetscFunctionReturn(0);
567   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
568     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
569     for (i=0; i<a->mbs; i++) {
570       for (j=0; j<bs; j++) {
571         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
572         for (k=a->i[i]; k<a->i[i+1]; k++) {
573           for (l=0; l<bs; l++) {
574 #if defined(PETSC_USE_COMPLEX)
575             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
576               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
577                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
578             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
579               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
580                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
581             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
582               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
583             }
584 #else
585             if (a->a[bs2*k + l*bs + j] != 0.0) {
586               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
587             }
588 #endif
589           }
590         }
591         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
592       }
593     }
594     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
595   } else {
596     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
597     for (i=0; i<a->mbs; i++) {
598       for (j=0; j<bs; j++) {
599         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
600         for (k=a->i[i]; k<a->i[i+1]; k++) {
601           for (l=0; l<bs; l++) {
602 #if defined(PETSC_USE_COMPLEX)
603             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
604               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
605                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
606             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
607               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
608                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
609             } else {
610               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
611             }
612 #else
613             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
614 #endif
615           }
616         }
617         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
618       }
619     }
620     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
621   }
622   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
628 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
629 {
630   Mat          A = (Mat) Aa;
631   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
632   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
633   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
634   MatScalar    *aa;
635   PetscViewer  viewer;
636 
637   PetscFunctionBegin;
638 
639   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
640   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
641 
642   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
643 
644   /* loop over matrix elements drawing boxes */
645   color = PETSC_DRAW_BLUE;
646   for (i=0,row=0; i<mbs; i++,row+=bs) {
647     for (j=a->i[i]; j<a->i[i+1]; j++) {
648       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
649       x_l = a->j[j]*bs; x_r = x_l + 1.0;
650       aa = a->a + j*bs2;
651       for (k=0; k<bs; k++) {
652         for (l=0; l<bs; l++) {
653           if (PetscRealPart(*aa++) >=  0.) continue;
654           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
655         }
656       }
657     }
658   }
659   color = PETSC_DRAW_CYAN;
660   for (i=0,row=0; i<mbs; i++,row+=bs) {
661     for (j=a->i[i]; j<a->i[i+1]; j++) {
662       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
663       x_l = a->j[j]*bs; x_r = x_l + 1.0;
664       aa = a->a + j*bs2;
665       for (k=0; k<bs; k++) {
666         for (l=0; l<bs; l++) {
667           if (PetscRealPart(*aa++) != 0.) continue;
668           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
669         }
670       }
671     }
672   }
673 
674   color = PETSC_DRAW_RED;
675   for (i=0,row=0; i<mbs; i++,row+=bs) {
676     for (j=a->i[i]; j<a->i[i+1]; j++) {
677       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
678       x_l = a->j[j]*bs; x_r = x_l + 1.0;
679       aa = a->a + j*bs2;
680       for (k=0; k<bs; k++) {
681         for (l=0; l<bs; l++) {
682           if (PetscRealPart(*aa++) <= 0.) continue;
683           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
684         }
685       }
686     }
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
693 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
694 {
695   int          ierr;
696   PetscReal    xl,yl,xr,yr,w,h;
697   PetscDraw    draw;
698   PetscTruth   isnull;
699 
700   PetscFunctionBegin;
701 
702   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
703   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
704 
705   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
706   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
707   xr += w;    yr += h;  xl = -w;     yl = -h;
708   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
709   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
710   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatView_SeqBAIJ"
716 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
717 {
718   int        ierr;
719   PetscTruth isascii,isbinary,isdraw;
720 
721   PetscFunctionBegin;
722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
723   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
724   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
725   if (isascii){
726     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
727   } else if (isbinary) {
728     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
729   } else if (isdraw) {
730     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
731   } else {
732     Mat B;
733     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
734     ierr = MatView(B,viewer);CHKERRQ(ierr);
735     ierr = MatDestroy(B);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatGetValues_SeqBAIJ"
743 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
744 {
745   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
746   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
747   int        *ai = a->i,*ailen = a->ilen;
748   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
749   MatScalar  *ap,*aa = a->a,zero = 0.0;
750 
751   PetscFunctionBegin;
752   for (k=0; k<m; k++) { /* loop over rows */
753     row  = im[k]; brow = row/bs;
754     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
755     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
756     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
757     nrow = ailen[brow];
758     for (l=0; l<n; l++) { /* loop over columns */
759       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
760       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
761       col  = in[l] ;
762       bcol = col/bs;
763       cidx = col%bs;
764       ridx = row%bs;
765       high = nrow;
766       low  = 0; /* assume unsorted */
767       while (high-low > 5) {
768         t = (low+high)/2;
769         if (rp[t] > bcol) high = t;
770         else             low  = t;
771       }
772       for (i=low; i<high; i++) {
773         if (rp[i] > bcol) break;
774         if (rp[i] == bcol) {
775           *v++ = ap[bs2*i+bs*cidx+ridx];
776           goto finished;
777         }
778       }
779       *v++ = zero;
780       finished:;
781     }
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #if defined(PETSC_USE_MAT_SINGLE)
787 #undef __FUNCT__
788 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
789 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
790 {
791   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
792   int         ierr,i,N = m*n*b->bs2;
793   MatScalar   *vsingle;
794 
795   PetscFunctionBegin;
796   if (N > b->setvalueslen) {
797     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
798     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
799     b->setvalueslen  = N;
800   }
801   vsingle = b->setvaluescopy;
802   for (i=0; i<N; i++) {
803     vsingle[i] = v[i];
804   }
805   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 #endif
809 
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
813 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
814 {
815   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
816   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
817   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
818   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
819   PetscTruth        roworiented=a->roworiented;
820   const MatScalar   *value = v;
821   MatScalar         *ap,*aa = a->a,*bap;
822 
823   PetscFunctionBegin;
824   if (roworiented) {
825     stepval = (n-1)*bs;
826   } else {
827     stepval = (m-1)*bs;
828   }
829   for (k=0; k<m; k++) { /* loop over added rows */
830     row  = im[k];
831     if (row < 0) continue;
832 #if defined(PETSC_USE_BOPT_g)
833     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
834 #endif
835     rp   = aj + ai[row];
836     ap   = aa + bs2*ai[row];
837     rmax = imax[row];
838     nrow = ailen[row];
839     low  = 0;
840     for (l=0; l<n; l++) { /* loop over added columns */
841       if (in[l] < 0) continue;
842 #if defined(PETSC_USE_BOPT_g)
843       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
844 #endif
845       col = in[l];
846       if (roworiented) {
847         value = v + k*(stepval+bs)*bs + l*bs;
848       } else {
849         value = v + l*(stepval+bs)*bs + k*bs;
850       }
851       if (!sorted) low = 0; high = nrow;
852       while (high-low > 7) {
853         t = (low+high)/2;
854         if (rp[t] > col) high = t;
855         else             low  = t;
856       }
857       for (i=low; i<high; i++) {
858         if (rp[i] > col) break;
859         if (rp[i] == col) {
860           bap  = ap +  bs2*i;
861           if (roworiented) {
862             if (is == ADD_VALUES) {
863               for (ii=0; ii<bs; ii++,value+=stepval) {
864                 for (jj=ii; jj<bs2; jj+=bs) {
865                   bap[jj] += *value++;
866                 }
867               }
868             } else {
869               for (ii=0; ii<bs; ii++,value+=stepval) {
870                 for (jj=ii; jj<bs2; jj+=bs) {
871                   bap[jj] = *value++;
872                 }
873               }
874             }
875           } else {
876             if (is == ADD_VALUES) {
877               for (ii=0; ii<bs; ii++,value+=stepval) {
878                 for (jj=0; jj<bs; jj++) {
879                   *bap++ += *value++;
880                 }
881               }
882             } else {
883               for (ii=0; ii<bs; ii++,value+=stepval) {
884                 for (jj=0; jj<bs; jj++) {
885                   *bap++  = *value++;
886                 }
887               }
888             }
889           }
890           goto noinsert2;
891         }
892       }
893       if (nonew == 1) goto noinsert2;
894       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
895       if (nrow >= rmax) {
896         /* there is no extra room in row, therefore enlarge */
897         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
898         MatScalar *new_a;
899 
900         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
901 
902         /* malloc new storage space */
903         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
904 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
905         new_j   = (int*)(new_a + bs2*new_nz);
906         new_i   = new_j + new_nz;
907 
908         /* copy over old data into new slots */
909         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
910         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
911         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
912         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
913         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
914         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
915         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
916         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
917         /* free up old matrix storage */
918         ierr = PetscFree(a->a);CHKERRQ(ierr);
919         if (!a->singlemalloc) {
920           ierr = PetscFree(a->i);CHKERRQ(ierr);
921           ierr = PetscFree(a->j);CHKERRQ(ierr);
922         }
923         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
924         a->singlemalloc = PETSC_TRUE;
925 
926         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
927         rmax = imax[row] = imax[row] + CHUNKSIZE;
928         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
929         a->maxnz += bs2*CHUNKSIZE;
930         a->reallocs++;
931         a->nz++;
932       }
933       N = nrow++ - 1;
934       /* shift up all the later entries in this row */
935       for (ii=N; ii>=i; ii--) {
936         rp[ii+1] = rp[ii];
937         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
938       }
939       if (N >= i) {
940         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
941       }
942       rp[i] = col;
943       bap   = ap +  bs2*i;
944       if (roworiented) {
945         for (ii=0; ii<bs; ii++,value+=stepval) {
946           for (jj=ii; jj<bs2; jj+=bs) {
947             bap[jj] = *value++;
948           }
949         }
950       } else {
951         for (ii=0; ii<bs; ii++,value+=stepval) {
952           for (jj=0; jj<bs; jj++) {
953             *bap++  = *value++;
954           }
955         }
956       }
957       noinsert2:;
958       low = i;
959     }
960     ailen[row] = nrow;
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
967 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
968 {
969   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
970   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
971   int        m = A->m,*ip,N,*ailen = a->ilen;
972   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
973   MatScalar  *aa = a->a,*ap;
974 
975   PetscFunctionBegin;
976   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
977 
978   if (m) rmax = ailen[0];
979   for (i=1; i<mbs; i++) {
980     /* move each row back by the amount of empty slots (fshift) before it*/
981     fshift += imax[i-1] - ailen[i-1];
982     rmax   = PetscMax(rmax,ailen[i]);
983     if (fshift) {
984       ip = aj + ai[i]; ap = aa + bs2*ai[i];
985       N = ailen[i];
986       for (j=0; j<N; j++) {
987         ip[j-fshift] = ip[j];
988         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
989       }
990     }
991     ai[i] = ai[i-1] + ailen[i-1];
992   }
993   if (mbs) {
994     fshift += imax[mbs-1] - ailen[mbs-1];
995     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
996   }
997   /* reset ilen and imax for each row */
998   for (i=0; i<mbs; i++) {
999     ailen[i] = imax[i] = ai[i+1] - ai[i];
1000   }
1001   a->nz = ai[mbs];
1002 
1003   /* diagonals may have moved, so kill the diagonal pointers */
1004   if (fshift && a->diag) {
1005     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1006     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1007     a->diag = 0;
1008   }
1009   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1010   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1011   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1012   a->reallocs          = 0;
1013   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1014 
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 
1019 
1020 /*
1021    This function returns an array of flags which indicate the locations of contiguous
1022    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1023    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1024    Assume: sizes should be long enough to hold all the values.
1025 */
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1028 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1029 {
1030   int        i,j,k,row;
1031   PetscTruth flg;
1032 
1033   PetscFunctionBegin;
1034   for (i=0,j=0; i<n; j++) {
1035     row = idx[i];
1036     if (row%bs!=0) { /* Not the begining of a block */
1037       sizes[j] = 1;
1038       i++;
1039     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1040       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1041       i++;
1042     } else { /* Begining of the block, so check if the complete block exists */
1043       flg = PETSC_TRUE;
1044       for (k=1; k<bs; k++) {
1045         if (row+k != idx[i+k]) { /* break in the block */
1046           flg = PETSC_FALSE;
1047           break;
1048         }
1049       }
1050       if (flg == PETSC_TRUE) { /* No break in the bs */
1051         sizes[j] = bs;
1052         i+= bs;
1053       } else {
1054         sizes[j] = 1;
1055         i++;
1056       }
1057     }
1058   }
1059   *bs_max = j;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1065 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1066 {
1067   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1068   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1069   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1070   PetscScalar zero = 0.0;
1071   MatScalar   *aa;
1072 
1073   PetscFunctionBegin;
1074   /* Make a copy of the IS and  sort it */
1075   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1076   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1077 
1078   /* allocate memory for rows,sizes */
1079   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1080   sizes = rows + is_n;
1081 
1082   /* copy IS values to rows, and sort them */
1083   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1084   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1085   if (baij->keepzeroedrows) {
1086     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1087     bs_max = is_n;
1088   } else {
1089     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1090   }
1091   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1092 
1093   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1094     row   = rows[j];
1095     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1096     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1097     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1098     if (sizes[i] == bs && !baij->keepzeroedrows) {
1099       if (diag) {
1100         if (baij->ilen[row/bs] > 0) {
1101           baij->ilen[row/bs]       = 1;
1102           baij->j[baij->i[row/bs]] = row/bs;
1103           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1104         }
1105         /* Now insert all the diagonal values for this bs */
1106         for (k=0; k<bs; k++) {
1107           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1108         }
1109       } else { /* (!diag) */
1110         baij->ilen[row/bs] = 0;
1111       } /* end (!diag) */
1112     } else { /* (sizes[i] != bs) */
1113 #if defined (PETSC_USE_DEBUG)
1114       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1115 #endif
1116       for (k=0; k<count; k++) {
1117         aa[0] =  zero;
1118         aa    += bs;
1119       }
1120       if (diag) {
1121         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1122       }
1123     }
1124   }
1125 
1126   ierr = PetscFree(rows);CHKERRQ(ierr);
1127   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1133 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1134 {
1135   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1136   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1137   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1138   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1139   int         ridx,cidx,bs2=a->bs2,ierr;
1140   PetscTruth  roworiented=a->roworiented;
1141   MatScalar   *ap,value,*aa=a->a,*bap;
1142 
1143   PetscFunctionBegin;
1144   for (k=0; k<m; k++) { /* loop over added rows */
1145     row  = im[k]; brow = row/bs;
1146     if (row < 0) continue;
1147 #if defined(PETSC_USE_BOPT_g)
1148     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1149 #endif
1150     rp   = aj + ai[brow];
1151     ap   = aa + bs2*ai[brow];
1152     rmax = imax[brow];
1153     nrow = ailen[brow];
1154     low  = 0;
1155     for (l=0; l<n; l++) { /* loop over added columns */
1156       if (in[l] < 0) continue;
1157 #if defined(PETSC_USE_BOPT_g)
1158       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1159 #endif
1160       col = in[l]; bcol = col/bs;
1161       ridx = row % bs; cidx = col % bs;
1162       if (roworiented) {
1163         value = v[l + k*n];
1164       } else {
1165         value = v[k + l*m];
1166       }
1167       if (!sorted) low = 0; high = nrow;
1168       while (high-low > 7) {
1169         t = (low+high)/2;
1170         if (rp[t] > bcol) high = t;
1171         else              low  = t;
1172       }
1173       for (i=low; i<high; i++) {
1174         if (rp[i] > bcol) break;
1175         if (rp[i] == bcol) {
1176           bap  = ap +  bs2*i + bs*cidx + ridx;
1177           if (is == ADD_VALUES) *bap += value;
1178           else                  *bap  = value;
1179           goto noinsert1;
1180         }
1181       }
1182       if (nonew == 1) goto noinsert1;
1183       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1184       if (nrow >= rmax) {
1185         /* there is no extra room in row, therefore enlarge */
1186         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1187         MatScalar *new_a;
1188 
1189         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1190 
1191         /* Malloc new storage space */
1192         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1193 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1194         new_j   = (int*)(new_a + bs2*new_nz);
1195         new_i   = new_j + new_nz;
1196 
1197         /* copy over old data into new slots */
1198         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1199         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1200         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1201         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1202         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1203         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1204         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1205         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1206         /* free up old matrix storage */
1207         ierr = PetscFree(a->a);CHKERRQ(ierr);
1208         if (!a->singlemalloc) {
1209           ierr = PetscFree(a->i);CHKERRQ(ierr);
1210           ierr = PetscFree(a->j);CHKERRQ(ierr);
1211         }
1212         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1213         a->singlemalloc = PETSC_TRUE;
1214 
1215         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1216         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1217         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1218         a->maxnz += bs2*CHUNKSIZE;
1219         a->reallocs++;
1220         a->nz++;
1221       }
1222       N = nrow++ - 1;
1223       /* shift up all the later entries in this row */
1224       for (ii=N; ii>=i; ii--) {
1225         rp[ii+1] = rp[ii];
1226         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1227       }
1228       if (N>=i) {
1229         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1230       }
1231       rp[i]                      = bcol;
1232       ap[bs2*i + bs*cidx + ridx] = value;
1233       noinsert1:;
1234       low = i;
1235     }
1236     ailen[brow] = nrow;
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1244 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1245 {
1246   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1247   Mat         outA;
1248   int         ierr;
1249   PetscTruth  row_identity,col_identity;
1250 
1251   PetscFunctionBegin;
1252   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1253   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1254   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1255   if (!row_identity || !col_identity) {
1256     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1257   }
1258 
1259   outA          = inA;
1260   inA->factor   = FACTOR_LU;
1261 
1262   if (!a->diag) {
1263     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1264   }
1265 
1266   a->row        = row;
1267   a->col        = col;
1268   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1269   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1270 
1271   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1272   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1273   PetscLogObjectParent(inA,a->icol);
1274 
1275   /*
1276       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1277       for ILU(0) factorization with natural ordering
1278   */
1279   if (a->bs < 8) {
1280     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1281   } else {
1282     if (!a->solve_work) {
1283       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1284       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1285     }
1286   }
1287 
1288   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1289 
1290   PetscFunctionReturn(0);
1291 }
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1294 int MatPrintHelp_SeqBAIJ(Mat A)
1295 {
1296   static PetscTruth called = PETSC_FALSE;
1297   MPI_Comm          comm = A->comm;
1298   int               ierr;
1299 
1300   PetscFunctionBegin;
1301   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1302   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1303   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 EXTERN_C_BEGIN
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1310 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1311 {
1312   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1313   int         i,nz,nbs;
1314 
1315   PetscFunctionBegin;
1316   nz  = baij->maxnz/baij->bs2;
1317   nbs = baij->nbs;
1318   for (i=0; i<nz; i++) {
1319     baij->j[i] = indices[i];
1320   }
1321   baij->nz = nz;
1322   for (i=0; i<nbs; i++) {
1323     baij->ilen[i] = baij->imax[i];
1324   }
1325 
1326   PetscFunctionReturn(0);
1327 }
1328 EXTERN_C_END
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1332 /*@
1333     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1334        in the matrix.
1335 
1336   Input Parameters:
1337 +  mat - the SeqBAIJ matrix
1338 -  indices - the column indices
1339 
1340   Level: advanced
1341 
1342   Notes:
1343     This can be called if you have precomputed the nonzero structure of the
1344   matrix and want to provide it to the matrix object to improve the performance
1345   of the MatSetValues() operation.
1346 
1347     You MUST have set the correct numbers of nonzeros per row in the call to
1348   MatCreateSeqBAIJ().
1349 
1350     MUST be called before any calls to MatSetValues();
1351 
1352 @*/
1353 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1354 {
1355   int ierr,(*f)(Mat,int *);
1356 
1357   PetscFunctionBegin;
1358   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1359   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1360   if (f) {
1361     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1362   } else {
1363     SETERRQ(1,"Wrong type of matrix to set column indices");
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1370 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1371 {
1372   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1373   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1374   PetscReal    atmp;
1375   PetscScalar  *x,zero = 0.0;
1376   MatScalar    *aa;
1377   int          ncols,brow,krow,kcol;
1378 
1379   PetscFunctionBegin;
1380   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1381   bs   = a->bs;
1382   aa   = a->a;
1383   ai   = a->i;
1384   aj   = a->j;
1385   mbs = a->mbs;
1386 
1387   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1388   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
1389   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1390   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1391   for (i=0; i<mbs; i++) {
1392     ncols = ai[1] - ai[0]; ai++;
1393     brow  = bs*i;
1394     for (j=0; j<ncols; j++){
1395       /* bcol = bs*(*aj); */
1396       for (kcol=0; kcol<bs; kcol++){
1397         for (krow=0; krow<bs; krow++){
1398           atmp = PetscAbsScalar(*aa); aa++;
1399           row = brow + krow;    /* row index */
1400           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1401           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1402         }
1403       }
1404       aj++;
1405     }
1406   }
1407   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1413 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1414 {
1415   int        ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1424 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1425 {
1426   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1427   PetscFunctionBegin;
1428   *array = a->a;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1434 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1435 {
1436   PetscFunctionBegin;
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #include "petscblaslapack.h"
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1443 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1444 {
1445   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1446   int          ierr,one=1,i,bs=y->bs,j,bs2;
1447 
1448   PetscFunctionBegin;
1449   if (str == SAME_NONZERO_PATTERN) {
1450     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1451   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1452     if (y->xtoy && y->XtoY != X) {
1453       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1454       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1455     }
1456     if (!y->xtoy) { /* get xtoy */
1457       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1458       y->XtoY = X;
1459     }
1460     bs2 = bs*bs;
1461     for (i=0; i<x->nz; i++) {
1462       j = 0;
1463       while (j < bs2){
1464         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1465         j++;
1466       }
1467     }
1468     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1469   } else {
1470     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /* -------------------------------------------------------------------*/
1476 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1477        MatGetRow_SeqBAIJ,
1478        MatRestoreRow_SeqBAIJ,
1479        MatMult_SeqBAIJ_N,
1480 /* 4*/ MatMultAdd_SeqBAIJ_N,
1481        MatMultTranspose_SeqBAIJ,
1482        MatMultTransposeAdd_SeqBAIJ,
1483        MatSolve_SeqBAIJ_N,
1484        0,
1485        0,
1486 /*10*/ 0,
1487        MatLUFactor_SeqBAIJ,
1488        0,
1489        0,
1490        MatTranspose_SeqBAIJ,
1491 /*15*/ MatGetInfo_SeqBAIJ,
1492        MatEqual_SeqBAIJ,
1493        MatGetDiagonal_SeqBAIJ,
1494        MatDiagonalScale_SeqBAIJ,
1495        MatNorm_SeqBAIJ,
1496 /*20*/ 0,
1497        MatAssemblyEnd_SeqBAIJ,
1498        0,
1499        MatSetOption_SeqBAIJ,
1500        MatZeroEntries_SeqBAIJ,
1501 /*25*/ MatZeroRows_SeqBAIJ,
1502        MatLUFactorSymbolic_SeqBAIJ,
1503        MatLUFactorNumeric_SeqBAIJ_N,
1504        0,
1505        0,
1506 /*30*/ MatSetUpPreallocation_SeqBAIJ,
1507        MatILUFactorSymbolic_SeqBAIJ,
1508        0,
1509        MatGetArray_SeqBAIJ,
1510        MatRestoreArray_SeqBAIJ,
1511 /*35*/ MatDuplicate_SeqBAIJ,
1512        0,
1513        0,
1514        MatILUFactor_SeqBAIJ,
1515        0,
1516 /*40*/ MatAXPY_SeqBAIJ,
1517        MatGetSubMatrices_SeqBAIJ,
1518        MatIncreaseOverlap_SeqBAIJ,
1519        MatGetValues_SeqBAIJ,
1520        0,
1521 /*45*/ MatPrintHelp_SeqBAIJ,
1522        MatScale_SeqBAIJ,
1523        0,
1524        0,
1525        0,
1526 /*50*/ MatGetBlockSize_SeqBAIJ,
1527        MatGetRowIJ_SeqBAIJ,
1528        MatRestoreRowIJ_SeqBAIJ,
1529        0,
1530        0,
1531 /*55*/ 0,
1532        0,
1533        0,
1534        0,
1535        MatSetValuesBlocked_SeqBAIJ,
1536 /*60*/ MatGetSubMatrix_SeqBAIJ,
1537        MatDestroy_SeqBAIJ,
1538        MatView_SeqBAIJ,
1539        MatGetPetscMaps_Petsc,
1540        0,
1541 /*65*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*70*/ MatGetRowMax_SeqBAIJ,
1547        MatConvert_Basic,
1548        0,
1549        0,
1550        0,
1551 /*75*/ 0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*80*/ 0,
1557        0,
1558        0,
1559        0,
1560 /*85*/ MatLoad_SeqBAIJ
1561 };
1562 
1563 EXTERN_C_BEGIN
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1566 int MatStoreValues_SeqBAIJ(Mat mat)
1567 {
1568   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1569   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1570   int         ierr;
1571 
1572   PetscFunctionBegin;
1573   if (aij->nonew != 1) {
1574     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1575   }
1576 
1577   /* allocate space for values if not already there */
1578   if (!aij->saved_values) {
1579     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1580   }
1581 
1582   /* copy values over */
1583   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 EXTERN_C_END
1587 
1588 EXTERN_C_BEGIN
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1591 int MatRetrieveValues_SeqBAIJ(Mat mat)
1592 {
1593   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1594   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1595 
1596   PetscFunctionBegin;
1597   if (aij->nonew != 1) {
1598     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1599   }
1600   if (!aij->saved_values) {
1601     SETERRQ(1,"Must call MatStoreValues(A);first");
1602   }
1603 
1604   /* copy values over */
1605   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 EXTERN_C_BEGIN
1611 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1612 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
1613 EXTERN_C_END
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1618 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1619 {
1620   Mat_SeqBAIJ *b;
1621   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1622   PetscTruth  flg;
1623 
1624   PetscFunctionBegin;
1625 
1626   B->preallocated = PETSC_TRUE;
1627   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1628   if (nnz && newbs != bs) {
1629     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1630   }
1631   bs   = newbs;
1632 
1633   mbs  = B->m/bs;
1634   nbs  = B->n/bs;
1635   bs2  = bs*bs;
1636 
1637   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1638     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1639   }
1640 
1641   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1642   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1643   if (nnz) {
1644     for (i=0; i<mbs; i++) {
1645       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1646       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1647     }
1648   }
1649 
1650   b       = (Mat_SeqBAIJ*)B->data;
1651   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1652   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1653   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1654   if (!flg) {
1655     switch (bs) {
1656     case 1:
1657       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1658       B->ops->mult            = MatMult_SeqBAIJ_1;
1659       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1660       break;
1661     case 2:
1662       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1663       B->ops->mult            = MatMult_SeqBAIJ_2;
1664       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1665       break;
1666     case 3:
1667       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1668       B->ops->mult            = MatMult_SeqBAIJ_3;
1669       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1670       break;
1671     case 4:
1672       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1673       B->ops->mult            = MatMult_SeqBAIJ_4;
1674       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1675       break;
1676     case 5:
1677       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1678       B->ops->mult            = MatMult_SeqBAIJ_5;
1679       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1680       break;
1681     case 6:
1682       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1683       B->ops->mult            = MatMult_SeqBAIJ_6;
1684       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1685       break;
1686     case 7:
1687       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1688       B->ops->mult            = MatMult_SeqBAIJ_7;
1689       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1690       break;
1691     default:
1692       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1693       B->ops->mult            = MatMult_SeqBAIJ_N;
1694       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1695       break;
1696     }
1697   }
1698   b->bs      = bs;
1699   b->mbs     = mbs;
1700   b->nbs     = nbs;
1701   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1702   if (!nnz) {
1703     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1704     else if (nz <= 0)        nz = 1;
1705     for (i=0; i<mbs; i++) b->imax[i] = nz;
1706     nz = nz*mbs;
1707   } else {
1708     nz = 0;
1709     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1710   }
1711 
1712   /* allocate the matrix space */
1713   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1714   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1715   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1716   b->j  = (int*)(b->a + nz*bs2);
1717   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1718   b->i  = b->j + nz;
1719   b->singlemalloc = PETSC_TRUE;
1720 
1721   b->i[0] = 0;
1722   for (i=1; i<mbs+1; i++) {
1723     b->i[i] = b->i[i-1] + b->imax[i-1];
1724   }
1725 
1726   /* b->ilen will count nonzeros in each block row so far. */
1727   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1728   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1729   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1730 
1731   b->bs               = bs;
1732   b->bs2              = bs2;
1733   b->mbs              = mbs;
1734   b->nz               = 0;
1735   b->maxnz            = nz*bs2;
1736   B->info.nz_unneeded = (PetscReal)b->maxnz;
1737   PetscFunctionReturn(0);
1738 }
1739 EXTERN_C_END
1740 
1741 /*MC
1742    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1743    block sparse compressed row format.
1744 
1745    Options Database Keys:
1746 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1747 
1748   Level: beginner
1749 
1750 .seealso: MatCreateSeqBAIJ
1751 M*/
1752 
1753 EXTERN_C_BEGIN
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatCreate_SeqBAIJ"
1756 int MatCreate_SeqBAIJ(Mat B)
1757 {
1758   int         ierr,size;
1759   Mat_SeqBAIJ *b;
1760 
1761   PetscFunctionBegin;
1762   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1763   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1764 
1765   B->m = B->M = PetscMax(B->m,B->M);
1766   B->n = B->N = PetscMax(B->n,B->N);
1767   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1768   B->data = (void*)b;
1769   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1770   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1771   B->factor           = 0;
1772   B->lupivotthreshold = 1.0;
1773   B->mapping          = 0;
1774   b->row              = 0;
1775   b->col              = 0;
1776   b->icol             = 0;
1777   b->reallocs         = 0;
1778   b->saved_values     = 0;
1779 #if defined(PETSC_USE_MAT_SINGLE)
1780   b->setvalueslen     = 0;
1781   b->setvaluescopy    = PETSC_NULL;
1782 #endif
1783 
1784   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1785   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1786 
1787   b->sorted           = PETSC_FALSE;
1788   b->roworiented      = PETSC_TRUE;
1789   b->nonew            = 0;
1790   b->diag             = 0;
1791   b->solve_work       = 0;
1792   b->mult_work        = 0;
1793   B->spptr            = 0;
1794   B->info.nz_unneeded = (PetscReal)b->maxnz;
1795   b->keepzeroedrows   = PETSC_FALSE;
1796   b->xtoy              = 0;
1797   b->XtoY              = 0;
1798 
1799   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1800                                      "MatStoreValues_SeqBAIJ",
1801                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1803                                      "MatRetrieveValues_SeqBAIJ",
1804                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1806                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1807                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1809                                      "MatConvert_SeqBAIJ_SeqAIJ",
1810                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
1812                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
1813                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1815                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1816                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1817   PetscFunctionReturn(0);
1818 }
1819 EXTERN_C_END
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1823 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1824 {
1825   Mat         C;
1826   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1827   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1828 
1829   PetscFunctionBegin;
1830   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1831 
1832   *B = 0;
1833   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1834   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1835   c    = (Mat_SeqBAIJ*)C->data;
1836 
1837   C->M   = A->M;
1838   C->N   = A->N;
1839   c->bs  = a->bs;
1840   c->bs2 = a->bs2;
1841   c->mbs = a->mbs;
1842   c->nbs = a->nbs;
1843   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1844 
1845   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1846   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1847   for (i=0; i<mbs; i++) {
1848     c->imax[i] = a->imax[i];
1849     c->ilen[i] = a->ilen[i];
1850   }
1851 
1852   /* allocate the matrix space */
1853   c->singlemalloc = PETSC_TRUE;
1854   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1855   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1856   c->j = (int*)(c->a + nz*bs2);
1857   c->i = c->j + nz;
1858   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1859   if (mbs > 0) {
1860     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1861     if (cpvalues == MAT_COPY_VALUES) {
1862       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1863     } else {
1864       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1865     }
1866   }
1867 
1868   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1869   c->sorted      = a->sorted;
1870   c->roworiented = a->roworiented;
1871   c->nonew       = a->nonew;
1872 
1873   if (a->diag) {
1874     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1875     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1876     for (i=0; i<mbs; i++) {
1877       c->diag[i] = a->diag[i];
1878     }
1879   } else c->diag        = 0;
1880   c->nz                 = a->nz;
1881   c->maxnz              = a->maxnz;
1882   c->solve_work         = 0;
1883   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1884   c->mult_work          = 0;
1885   C->preallocated       = PETSC_TRUE;
1886   C->assembled          = PETSC_TRUE;
1887   *B = C;
1888   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatLoad_SeqBAIJ"
1894 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1895 {
1896   Mat_SeqBAIJ  *a;
1897   Mat          B;
1898   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1899   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1900   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1901   int          *masked,nmask,tmp,bs2,ishift;
1902   PetscScalar  *aa;
1903   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1904 
1905   PetscFunctionBegin;
1906   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1907   bs2  = bs*bs;
1908 
1909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1910   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1911   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1912   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1913   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1914   M = header[1]; N = header[2]; nz = header[3];
1915 
1916   if (header[3] < 0) {
1917     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1918   }
1919 
1920   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1921 
1922   /*
1923      This code adds extra rows to make sure the number of rows is
1924     divisible by the blocksize
1925   */
1926   mbs        = M/bs;
1927   extra_rows = bs - M + bs*(mbs);
1928   if (extra_rows == bs) extra_rows = 0;
1929   else                  mbs++;
1930   if (extra_rows) {
1931     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1932   }
1933 
1934   /* read in row lengths */
1935   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1936   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1937   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1938 
1939   /* read in column indices */
1940   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1941   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1942   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1943 
1944   /* loop over row lengths determining block row lengths */
1945   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1946   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1947   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1948   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1949   masked   = mask + mbs;
1950   rowcount = 0; nzcount = 0;
1951   for (i=0; i<mbs; i++) {
1952     nmask = 0;
1953     for (j=0; j<bs; j++) {
1954       kmax = rowlengths[rowcount];
1955       for (k=0; k<kmax; k++) {
1956         tmp = jj[nzcount++]/bs;
1957         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1958       }
1959       rowcount++;
1960     }
1961     browlengths[i] += nmask;
1962     /* zero out the mask elements we set */
1963     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1964   }
1965 
1966   /* create our matrix */
1967   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1968   ierr = MatSetType(B,type);CHKERRQ(ierr);
1969   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1970   a = (Mat_SeqBAIJ*)B->data;
1971 
1972   /* set matrix "i" values */
1973   a->i[0] = 0;
1974   for (i=1; i<= mbs; i++) {
1975     a->i[i]      = a->i[i-1] + browlengths[i-1];
1976     a->ilen[i-1] = browlengths[i-1];
1977   }
1978   a->nz         = 0;
1979   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1980 
1981   /* read in nonzero values */
1982   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1983   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1984   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1985 
1986   /* set "a" and "j" values into matrix */
1987   nzcount = 0; jcount = 0;
1988   for (i=0; i<mbs; i++) {
1989     nzcountb = nzcount;
1990     nmask    = 0;
1991     for (j=0; j<bs; j++) {
1992       kmax = rowlengths[i*bs+j];
1993       for (k=0; k<kmax; k++) {
1994         tmp = jj[nzcount++]/bs;
1995 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1996       }
1997     }
1998     /* sort the masked values */
1999     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2000 
2001     /* set "j" values into matrix */
2002     maskcount = 1;
2003     for (j=0; j<nmask; j++) {
2004       a->j[jcount++]  = masked[j];
2005       mask[masked[j]] = maskcount++;
2006     }
2007     /* set "a" values into matrix */
2008     ishift = bs2*a->i[i];
2009     for (j=0; j<bs; j++) {
2010       kmax = rowlengths[i*bs+j];
2011       for (k=0; k<kmax; k++) {
2012         tmp       = jj[nzcountb]/bs ;
2013         block     = mask[tmp] - 1;
2014         point     = jj[nzcountb] - bs*tmp;
2015         idx       = ishift + bs2*block + j + bs*point;
2016         a->a[idx] = (MatScalar)aa[nzcountb++];
2017       }
2018     }
2019     /* zero out the mask elements we set */
2020     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2021   }
2022   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2023 
2024   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2025   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2026   ierr = PetscFree(aa);CHKERRQ(ierr);
2027   ierr = PetscFree(jj);CHKERRQ(ierr);
2028   ierr = PetscFree(mask);CHKERRQ(ierr);
2029 
2030   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   ierr = MatView_Private(B);CHKERRQ(ierr);
2033 
2034   *A = B;
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatCreateSeqBAIJ"
2040 /*@C
2041    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2042    compressed row) format.  For good matrix assembly performance the
2043    user should preallocate the matrix storage by setting the parameter nz
2044    (or the array nnz).  By setting these parameters accurately, performance
2045    during matrix assembly can be increased by more than a factor of 50.
2046 
2047    Collective on MPI_Comm
2048 
2049    Input Parameters:
2050 +  comm - MPI communicator, set to PETSC_COMM_SELF
2051 .  bs - size of block
2052 .  m - number of rows
2053 .  n - number of columns
2054 .  nz - number of nonzero blocks  per block row (same for all rows)
2055 -  nnz - array containing the number of nonzero blocks in the various block rows
2056          (possibly different for each block row) or PETSC_NULL
2057 
2058    Output Parameter:
2059 .  A - the matrix
2060 
2061    Options Database Keys:
2062 .   -mat_no_unroll - uses code that does not unroll the loops in the
2063                      block calculations (much slower)
2064 .    -mat_block_size - size of the blocks to use
2065 
2066    Level: intermediate
2067 
2068    Notes:
2069    A nonzero block is any block that as 1 or more nonzeros in it
2070 
2071    The block AIJ format is fully compatible with standard Fortran 77
2072    storage.  That is, the stored row and column indices can begin at
2073    either one (as in Fortran) or zero.  See the users' manual for details.
2074 
2075    Specify the preallocated storage with either nz or nnz (not both).
2076    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2077    allocation.  For additional details, see the users manual chapter on
2078    matrices.
2079 
2080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2081 @*/
2082 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2083 {
2084   int ierr;
2085 
2086   PetscFunctionBegin;
2087   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2088   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2089   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2095 /*@C
2096    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2097    per row in the matrix. For good matrix assembly performance the
2098    user should preallocate the matrix storage by setting the parameter nz
2099    (or the array nnz).  By setting these parameters accurately, performance
2100    during matrix assembly can be increased by more than a factor of 50.
2101 
2102    Collective on MPI_Comm
2103 
2104    Input Parameters:
2105 +  A - the matrix
2106 .  bs - size of block
2107 .  nz - number of block nonzeros per block row (same for all rows)
2108 -  nnz - array containing the number of block nonzeros in the various block rows
2109          (possibly different for each block row) or PETSC_NULL
2110 
2111    Options Database Keys:
2112 .   -mat_no_unroll - uses code that does not unroll the loops in the
2113                      block calculations (much slower)
2114 .    -mat_block_size - size of the blocks to use
2115 
2116    Level: intermediate
2117 
2118    Notes:
2119    The block AIJ format is fully compatible with standard Fortran 77
2120    storage.  That is, the stored row and column indices can begin at
2121    either one (as in Fortran) or zero.  See the users' manual for details.
2122 
2123    Specify the preallocated storage with either nz or nnz (not both).
2124    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2125    allocation.  For additional details, see the users manual chapter on
2126    matrices.
2127 
2128 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2129 @*/
2130 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2131 {
2132   int ierr,(*f)(Mat,int,int,const int[]);
2133 
2134   PetscFunctionBegin;
2135   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2136   if (f) {
2137     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #include "src/inline/ilu.h"
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
2146 int MatInvertBlockDiagonal_SeqBAIJ(Mat A)
2147 {
2148   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
2149   int         *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
2150   PetscScalar *v = a->a,*odiag,*diag;
2151 
2152   PetscFunctionBegin;
2153   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
2154   diag_offset = a->diag;
2155   if (!a->idiag) {
2156     ierr = PetscMalloc(bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
2157   }
2158   diag = a->idiag;
2159   /* factor and invert each block */
2160   switch (a->bs){
2161     case 2:
2162       for (i=0; i<mbs; i++) {
2163         odiag   = v + 4*diag_offset[i];
2164         diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
2165 	ierr    = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
2166 	diag   += 4;
2167       }
2168       break;
2169     case 3:
2170       for (i=0; i<mbs; i++) {
2171   CHKMEMQ;
2172         odiag   = v + 9*diag_offset[i];
2173         diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
2174         diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
2175         diag[8] = odiag[8];
2176 	ierr    = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
2177 	diag   += 9;
2178   CHKMEMQ;
2179       }
2180       break;
2181     case 4:
2182       for (i=0; i<mbs; i++) {
2183         odiag = v + 16*diag_offset[i];
2184         ierr  = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
2185 	ierr  = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
2186 	diag += 16;
2187       }
2188       break;
2189     case 5:
2190       for (i=0; i<mbs; i++) {
2191         odiag = v + 25*diag_offset[i];
2192         ierr  = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
2193 	ierr  = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
2194 	diag += 25;
2195       }
2196       break;
2197     default:
2198       SETERRQ1(1,"not supported for block size %d",a->bs);
2199   }
2200   CHKMEMQ;
2201   PetscFunctionReturn(0);
2202 }
2203 
2204