xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5a655dc69978bfa5d89a399bdbc880d673ee91a3)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.166 1999/03/04 22:35:37 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     PetscFree(*ia);
99     PetscFree(*ja);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   if (--A->refct > 0) PetscFunctionReturn(0);
128 
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(USE_PETSC_LOG)
142   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   PetscFree(a->a);
145   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
146   if (a->diag) PetscFree(a->diag);
147   if (a->ilen) PetscFree(a->ilen);
148   if (a->imax) PetscFree(a->imax);
149   if (a->solve_work) PetscFree(a->solve_work);
150   if (a->mult_work) PetscFree(a->mult_work);
151   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
152   PetscFree(a);
153   PLogObjectDestroy(A);
154   PetscHeaderDestroy(A);
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNC__
159 #define __FUNC__ "MatSetOption_SeqBAIJ"
160 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
161 {
162   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
163 
164   PetscFunctionBegin;
165   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
166   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
167   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
168   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
169   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
170   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
171   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
172   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
173   else if (op == MAT_ROWS_SORTED ||
174            op == MAT_ROWS_UNSORTED ||
175            op == MAT_SYMMETRIC ||
176            op == MAT_STRUCTURALLY_SYMMETRIC ||
177            op == MAT_YES_NEW_DIAGONALS ||
178            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
179            op == MAT_USE_HASH_TABLE) {
180     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
181   } else if (op == MAT_NO_NEW_DIAGONALS) {
182     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
183   } else {
184     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatGetSize_SeqBAIJ"
192 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
193 {
194   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
195 
196   PetscFunctionBegin;
197   if (m) *m = a->m;
198   if (n) *n = a->n;
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNC__
203 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
204 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
205 {
206   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
207 
208   PetscFunctionBegin;
209   *m = 0; *n = a->m;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetRow_SeqBAIJ"
215 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
216 {
217   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
218   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
219   MatScalar    *aa,*aa_i;
220   Scalar       *v_i;
221 
222   PetscFunctionBegin;
223   bs  = a->bs;
224   ai  = a->i;
225   aj  = a->j;
226   aa  = a->a;
227   bs2 = a->bs2;
228 
229   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
230 
231   bn  = row/bs;   /* Block number */
232   bp  = row % bs; /* Block Position */
233   M   = ai[bn+1] - ai[bn];
234   *nz = bs*M;
235 
236   if (v) {
237     *v = 0;
238     if (*nz) {
239       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
240       for ( i=0; i<M; i++ ) { /* for each block in the block row */
241         v_i  = *v + i*bs;
242         aa_i = aa + bs2*(ai[bn] + i);
243         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
244       }
245     }
246   }
247 
248   if (idx) {
249     *idx = 0;
250     if (*nz) {
251       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
252       for ( i=0; i<M; i++ ) { /* for each block in the block row */
253         idx_i = *idx + i*bs;
254         itmp  = bs*aj[ai[bn] + i];
255         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
264 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
265 {
266   PetscFunctionBegin;
267   if (idx) {if (*idx) PetscFree(*idx);}
268   if (v)   {if (*v)   PetscFree(*v);}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNC__
273 #define __FUNC__ "MatTranspose_SeqBAIJ"
274 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
275 {
276   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
277   Mat         C;
278   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
279   int         *rows,*cols,bs2=a->bs2;
280   MatScalar   *array = a->a;
281 
282   PetscFunctionBegin;
283   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
284   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
285   PetscMemzero(col,(1+nbs)*sizeof(int));
286 
287   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
288   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
289   PetscFree(col);
290   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
291   cols = rows + bs;
292   for ( i=0; i<mbs; i++ ) {
293     cols[0] = i*bs;
294     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
295     len = ai[i+1] - ai[i];
296     for ( j=0; j<len; j++ ) {
297       rows[0] = (*aj++)*bs;
298       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
299       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
300       array += bs2;
301     }
302   }
303   PetscFree(rows);
304 
305   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
306   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
307 
308   if (B != PETSC_NULL) {
309     *B = C;
310   } else {
311     PetscOps *Abops;
312     MatOps   Aops;
313 
314     /* This isn't really an in-place transpose */
315     PetscFree(a->a);
316     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
317     if (a->diag) PetscFree(a->diag);
318     if (a->ilen) PetscFree(a->ilen);
319     if (a->imax) PetscFree(a->imax);
320     if (a->solve_work) PetscFree(a->solve_work);
321     PetscFree(a);
322 
323 
324     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
325     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
326 
327     /*
328        This is horrible, horrible code. We need to keep the
329       A pointers for the bops and ops but copy everything
330       else from C.
331     */
332     Abops    = A->bops;
333     Aops     = A->ops;
334     PetscMemcpy(A,C,sizeof(struct _p_Mat));
335     A->bops  = Abops;
336     A->ops   = Aops;
337     A->qlist = 0;
338 
339     PetscHeaderDestroy(C);
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ_Binary"
346 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
347 {
348   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
349   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
350   Scalar      *aa;
351   FILE        *file;
352 
353   PetscFunctionBegin;
354   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
355   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
356   col_lens[0] = MAT_COOKIE;
357 
358   col_lens[1] = a->m;
359   col_lens[2] = a->n;
360   col_lens[3] = a->nz*bs2;
361 
362   /* store lengths of each row and write (including header) to file */
363   count = 0;
364   for ( i=0; i<a->mbs; i++ ) {
365     for ( j=0; j<bs; j++ ) {
366       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
367     }
368   }
369   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
370   PetscFree(col_lens);
371 
372   /* store column indices (zero start index) */
373   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
374   count = 0;
375   for ( i=0; i<a->mbs; i++ ) {
376     for ( j=0; j<bs; j++ ) {
377       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
378         for ( l=0; l<bs; l++ ) {
379           jj[count++] = bs*a->j[k] + l;
380         }
381       }
382     }
383   }
384   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
385   PetscFree(jj);
386 
387   /* store nonzero values */
388   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
389   count = 0;
390   for ( i=0; i<a->mbs; i++ ) {
391     for ( j=0; j<bs; j++ ) {
392       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
393         for ( l=0; l<bs; l++ ) {
394           aa[count++] = a->a[bs2*k + l*bs + j];
395         }
396       }
397     }
398   }
399   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
400   PetscFree(aa);
401 
402   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
403   if (file) {
404     fprintf(file,"-matload_block_size %d\n",a->bs);
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
411 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
412 {
413   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
414   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
415   FILE        *fd;
416   char        *outputname;
417 
418   PetscFunctionBegin;
419   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
420   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
421   ierr = ViewerGetFormat(viewer,&format);
422   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
423     ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);
424   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
425     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
426   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
427     for ( i=0; i<a->mbs; i++ ) {
428       for ( j=0; j<bs; j++ ) {
429         fprintf(fd,"row %d:",i*bs+j);
430         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
431           for ( l=0; l<bs; l++ ) {
432 #if defined(USE_PETSC_COMPLEX)
433             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
434               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
435                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
436             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
437               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
438                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
439             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
440               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
441             }
442 #else
443             if (a->a[bs2*k + l*bs + j] != 0.0) {
444               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
445             }
446 #endif
447           }
448         }
449         fprintf(fd,"\n");
450       }
451     }
452   } else {
453     for ( i=0; i<a->mbs; i++ ) {
454       for ( j=0; j<bs; j++ ) {
455         fprintf(fd,"row %d:",i*bs+j);
456         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
457           for ( l=0; l<bs; l++ ) {
458 #if defined(USE_PETSC_COMPLEX)
459             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
460               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
461                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
462             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
463               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
464                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
465             } else {
466               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
467             }
468 #else
469             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
470 #endif
471           }
472         }
473         fprintf(fd,"\n");
474       }
475     }
476   }
477   fflush(fd);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNC__
482 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
483 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
484 {
485   Mat          A = (Mat) Aa;
486   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
487   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
488   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
489   MatScalar    *aa;
490   MPI_Comm     comm;
491   Viewer       viewer;
492 
493   PetscFunctionBegin;
494   /*
495       This is nasty. If this is called from an originally parallel matrix
496    then all processes call this, but only the first has the matrix so the
497    rest should return immediately.
498   */
499   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
500   MPI_Comm_rank(comm,&rank);
501   if (rank) PetscFunctionReturn(0);
502 
503   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
504 
505   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr);
506 
507   /* loop over matrix elements drawing boxes */
508   color = DRAW_BLUE;
509   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
510     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
511       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
512       x_l = a->j[j]*bs; x_r = x_l + 1.0;
513       aa = a->a + j*bs2;
514       for ( k=0; k<bs; k++ ) {
515         for ( l=0; l<bs; l++ ) {
516           if (PetscReal(*aa++) >=  0.) continue;
517           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
518         }
519       }
520     }
521   }
522   color = DRAW_CYAN;
523   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
524     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
525       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
526       x_l = a->j[j]*bs; x_r = x_l + 1.0;
527       aa = a->a + j*bs2;
528       for ( k=0; k<bs; k++ ) {
529         for ( l=0; l<bs; l++ ) {
530           if (PetscReal(*aa++) != 0.) continue;
531           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
532         }
533       }
534     }
535   }
536 
537   color = DRAW_RED;
538   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
539     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
540       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
541       x_l = a->j[j]*bs; x_r = x_l + 1.0;
542       aa = a->a + j*bs2;
543       for ( k=0; k<bs; k++ ) {
544         for ( l=0; l<bs; l++ ) {
545           if (PetscReal(*aa++) <= 0.) continue;
546           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
547         }
548       }
549     }
550   }
551   PetscFunctionReturn(0);
552 }
553 
554 #undef __FUNC__
555 #define __FUNC__ "MatView_SeqBAIJ_Draw"
556 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
557 {
558   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
559   int          ierr;
560   double       xl,yl,xr,yr,w,h;
561   Draw         draw;
562   PetscTruth   isnull;
563 
564   PetscFunctionBegin;
565 
566   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
567   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
568 
569   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
570   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
571   xr += w;    yr += h;  xl = -w;     yl = -h;
572   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
573   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr);
574   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNC__
579 #define __FUNC__ "MatView_SeqBAIJ"
580 int MatView_SeqBAIJ(Mat A,Viewer viewer)
581 {
582   ViewerType  vtype;
583   int         ierr;
584 
585   PetscFunctionBegin;
586   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
587   if (PetscTypeCompare(vtype,SOCKET_VIEWER)) {
588     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
589   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
590     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
591   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
592     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
593   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
594     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
595   } else {
596     SETERRQ(1,1,"Viewer type not supported by PETSc object");
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 
602 #undef __FUNC__
603 #define __FUNC__ "MatGetValues_SeqBAIJ"
604 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
605 {
606   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
607   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
608   int        *ai = a->i, *ailen = a->ilen;
609   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
610   MatScalar  *ap, *aa = a->a, zero = 0.0;
611 
612   PetscFunctionBegin;
613   for ( k=0; k<m; k++ ) { /* loop over rows */
614     row  = im[k]; brow = row/bs;
615     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
616     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
617     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
618     nrow = ailen[brow];
619     for ( l=0; l<n; l++ ) { /* loop over columns */
620       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
621       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
622       col  = in[l] ;
623       bcol = col/bs;
624       cidx = col%bs;
625       ridx = row%bs;
626       high = nrow;
627       low  = 0; /* assume unsorted */
628       while (high-low > 5) {
629         t = (low+high)/2;
630         if (rp[t] > bcol) high = t;
631         else             low  = t;
632       }
633       for ( i=low; i<high; i++ ) {
634         if (rp[i] > bcol) break;
635         if (rp[i] == bcol) {
636           *v++ = ap[bs2*i+bs*cidx+ridx];
637           goto finished;
638         }
639       }
640       *v++ = zero;
641       finished:;
642     }
643   }
644   PetscFunctionReturn(0);
645 }
646 
647 
648 #undef __FUNC__
649 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
650 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
651 {
652   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
653   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
654   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
655   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
656   Scalar      *value = v;
657   MatScalar   *ap,*aa=a->a,*bap;
658 
659   PetscFunctionBegin;
660   if (roworiented) {
661     stepval = (n-1)*bs;
662   } else {
663     stepval = (m-1)*bs;
664   }
665   for ( k=0; k<m; k++ ) { /* loop over added rows */
666     row  = im[k];
667     if (row < 0) continue;
668 #if defined(USE_PETSC_BOPT_g)
669     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
670 #endif
671     rp   = aj + ai[row];
672     ap   = aa + bs2*ai[row];
673     rmax = imax[row];
674     nrow = ailen[row];
675     low  = 0;
676     for ( l=0; l<n; l++ ) { /* loop over added columns */
677       if (in[l] < 0) continue;
678 #if defined(USE_PETSC_BOPT_g)
679       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
680 #endif
681       col = in[l];
682       if (roworiented) {
683         value = v + k*(stepval+bs)*bs + l*bs;
684       } else {
685         value = v + l*(stepval+bs)*bs + k*bs;
686       }
687       if (!sorted) low = 0; high = nrow;
688       while (high-low > 7) {
689         t = (low+high)/2;
690         if (rp[t] > col) high = t;
691         else             low  = t;
692       }
693       for ( i=low; i<high; i++ ) {
694         if (rp[i] > col) break;
695         if (rp[i] == col) {
696           bap  = ap +  bs2*i;
697           if (roworiented) {
698             if (is == ADD_VALUES) {
699               for ( ii=0; ii<bs; ii++,value+=stepval ) {
700                 for (jj=ii; jj<bs2; jj+=bs ) {
701                   bap[jj] += *value++;
702                 }
703               }
704             } else {
705               for ( ii=0; ii<bs; ii++,value+=stepval ) {
706                 for (jj=ii; jj<bs2; jj+=bs ) {
707                   bap[jj] = *value++;
708                 }
709               }
710             }
711           } else {
712             if (is == ADD_VALUES) {
713               for ( ii=0; ii<bs; ii++,value+=stepval ) {
714                 for (jj=0; jj<bs; jj++ ) {
715                   *bap++ += *value++;
716                 }
717               }
718             } else {
719               for ( ii=0; ii<bs; ii++,value+=stepval ) {
720                 for (jj=0; jj<bs; jj++ ) {
721                   *bap++  = *value++;
722                 }
723               }
724             }
725           }
726           goto noinsert2;
727         }
728       }
729       if (nonew == 1) goto noinsert2;
730       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
731       if (nrow >= rmax) {
732         /* there is no extra room in row, therefore enlarge */
733         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
734         MatScalar *new_a;
735 
736         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
737 
738         /* malloc new storage space */
739         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
740         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
741         new_j   = (int *) (new_a + bs2*new_nz);
742         new_i   = new_j + new_nz;
743 
744         /* copy over old data into new slots */
745         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
746         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
747         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
748         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
749         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
750         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
751         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
752         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
753         /* free up old matrix storage */
754         PetscFree(a->a);
755         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
756         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
757         a->singlemalloc = 1;
758 
759         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
760         rmax = imax[row] = imax[row] + CHUNKSIZE;
761         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
762         a->maxnz += bs2*CHUNKSIZE;
763         a->reallocs++;
764         a->nz++;
765       }
766       N = nrow++ - 1;
767       /* shift up all the later entries in this row */
768       for ( ii=N; ii>=i; ii-- ) {
769         rp[ii+1] = rp[ii];
770         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
771       }
772       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
773       rp[i] = col;
774       bap   = ap +  bs2*i;
775       if (roworiented) {
776         for ( ii=0; ii<bs; ii++,value+=stepval) {
777           for (jj=ii; jj<bs2; jj+=bs ) {
778             bap[jj] = *value++;
779           }
780         }
781       } else {
782         for ( ii=0; ii<bs; ii++,value+=stepval ) {
783           for (jj=0; jj<bs; jj++ ) {
784             *bap++  = *value++;
785           }
786         }
787       }
788       noinsert2:;
789       low = i;
790     }
791     ailen[row] = nrow;
792   }
793   PetscFunctionReturn(0);
794 }
795 
796 
797 #undef __FUNC__
798 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
799 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
800 {
801   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
802   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
803   int        m = a->m,*ip, N, *ailen = a->ilen;
804   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
805   MatScalar  *aa = a->a, *ap;
806 
807   PetscFunctionBegin;
808   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
809 
810   if (m) rmax = ailen[0];
811   for ( i=1; i<mbs; i++ ) {
812     /* move each row back by the amount of empty slots (fshift) before it*/
813     fshift += imax[i-1] - ailen[i-1];
814     rmax   = PetscMax(rmax,ailen[i]);
815     if (fshift) {
816       ip = aj + ai[i]; ap = aa + bs2*ai[i];
817       N = ailen[i];
818       for ( j=0; j<N; j++ ) {
819         ip[j-fshift] = ip[j];
820         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
821       }
822     }
823     ai[i] = ai[i-1] + ailen[i-1];
824   }
825   if (mbs) {
826     fshift += imax[mbs-1] - ailen[mbs-1];
827     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
828   }
829   /* reset ilen and imax for each row */
830   for ( i=0; i<mbs; i++ ) {
831     ailen[i] = imax[i] = ai[i+1] - ai[i];
832   }
833   a->nz = ai[mbs];
834 
835   /* diagonals may have moved, so kill the diagonal pointers */
836   if (fshift && a->diag) {
837     PetscFree(a->diag);
838     PLogObjectMemory(A,-(m+1)*sizeof(int));
839     a->diag = 0;
840   }
841   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
842            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
843   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
844            a->reallocs);
845   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
846   a->reallocs          = 0;
847   A->info.nz_unneeded  = (double)fshift*bs2;
848 
849   PetscFunctionReturn(0);
850 }
851 
852 
853 
854 /*
855    This function returns an array of flags which indicate the locations of contiguous
856    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
857    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
858    Assume: sizes should be long enough to hold all the values.
859 */
860 #undef __FUNC__
861 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
862 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
863 {
864   int i,j,k,row;
865   PetscTruth flg;
866 
867   /*   PetscFunctionBegin;*/
868   for ( i=0,j=0; i<n; j++ ) {
869     row = idx[i];
870     if (row%bs!=0) { /* Not the begining of a block */
871       sizes[j] = 1;
872       i++;
873     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
874       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
875       i++;
876     } else { /* Begining of the block, so check if the complete block exists */
877       flg = PETSC_TRUE;
878       for ( k=1; k<bs; k++ ) {
879         if (row+k != idx[i+k]) { /* break in the block */
880           flg = PETSC_FALSE;
881           break;
882         }
883       }
884       if (flg == PETSC_TRUE) { /* No break in the bs */
885         sizes[j] = bs;
886         i+= bs;
887       } else {
888         sizes[j] = 1;
889         i++;
890       }
891     }
892   }
893   *bs_max = j;
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNC__
898 #define __FUNC__ "MatZeroRows_SeqBAIJ"
899 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
900 {
901   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
902   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
903   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
904   Scalar      zero = 0.0;
905   MatScalar   *aa;
906 
907   PetscFunctionBegin;
908   /* Make a copy of the IS and  sort it */
909   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
910   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
911 
912   /* allocate memory for rows,sizes */
913   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int)); CHKPTRQ(rows);
914   sizes = rows + is_n;
915 
916   /* initialize copy IS valurs to rows, and sort them */
917   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
918   ierr = PetscSortInt(is_n,rows); CHKERRQ(ierr);
919   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max); CHKERRQ(ierr);
920   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
921 
922   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
923     row   = rows[j];
924     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
925     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
926     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
927     if (sizes[i] == bs) {
928       if (diag) {
929         if (baij->ilen[row/bs] > 0) {
930           baij->ilen[row/bs] = 1;
931           baij->j[baij->i[row/bs]] = row/bs;
932           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar)); CHKERRQ(ierr);
933         }
934         /* Now insert all the diagoanl values for this bs */
935         for ( k=0; k<bs; k++ ) {
936           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
937         }
938       } else { /* (!diag) */
939         baij->ilen[row/bs] = 0;
940       } /* end (!diag) */
941     } else { /* (sizes[i] != bs) */
942 #if defined (USE_PETSC_DEBUG)
943       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
944 #endif
945       for ( k=0; k<count; k++ ) {
946         aa[0] = zero;
947         aa+=bs;
948       }
949       if (diag) {
950         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
951       }
952     }
953   }
954 
955   PetscFree(rows);
956   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNC__
961 #define __FUNC__ "MatSetValues_SeqBAIJ"
962 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
963 {
964   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
965   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
966   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
967   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
968   int         ridx,cidx,bs2=a->bs2;
969   MatScalar   *ap,value,*aa=a->a,*bap;
970 
971   PetscFunctionBegin;
972   for ( k=0; k<m; k++ ) { /* loop over added rows */
973     row  = im[k]; brow = row/bs;
974     if (row < 0) continue;
975 #if defined(USE_PETSC_BOPT_g)
976     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
977 #endif
978     rp   = aj + ai[brow];
979     ap   = aa + bs2*ai[brow];
980     rmax = imax[brow];
981     nrow = ailen[brow];
982     low  = 0;
983     for ( l=0; l<n; l++ ) { /* loop over added columns */
984       if (in[l] < 0) continue;
985 #if defined(USE_PETSC_BOPT_g)
986       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
987 #endif
988       col = in[l]; bcol = col/bs;
989       ridx = row % bs; cidx = col % bs;
990       if (roworiented) {
991         value = v[l + k*n];
992       } else {
993         value = v[k + l*m];
994       }
995       if (!sorted) low = 0; high = nrow;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for ( i=low; i<high; i++ ) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap  = ap +  bs2*i + bs*cidx + ridx;
1005           if (is == ADD_VALUES) *bap += value;
1006           else                  *bap  = value;
1007           goto noinsert1;
1008         }
1009       }
1010       if (nonew == 1) goto noinsert1;
1011       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1012       if (nrow >= rmax) {
1013         /* there is no extra room in row, therefore enlarge */
1014         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1015         MatScalar *new_a;
1016 
1017         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1018 
1019         /* Malloc new storage space */
1020         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1021         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
1022         new_j   = (int *) (new_a + bs2*new_nz);
1023         new_i   = new_j + new_nz;
1024 
1025         /* copy over old data into new slots */
1026         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1027         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1028         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1029         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1030         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
1031                                                            len*sizeof(int));
1032         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1033         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1034         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
1035                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1036         /* free up old matrix storage */
1037         PetscFree(a->a);
1038         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
1039         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1040         a->singlemalloc = 1;
1041 
1042         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1043         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1044         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1045         a->maxnz += bs2*CHUNKSIZE;
1046         a->reallocs++;
1047         a->nz++;
1048       }
1049       N = nrow++ - 1;
1050       /* shift up all the later entries in this row */
1051       for ( ii=N; ii>=i; ii-- ) {
1052         rp[ii+1] = rp[ii];
1053         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1054       }
1055       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1056       rp[i]                      = bcol;
1057       ap[bs2*i + bs*cidx + ridx] = value;
1058       noinsert1:;
1059       low = i;
1060     }
1061     ailen[brow] = nrow;
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1067 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1068 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1069 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1070 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1071 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1072 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1073 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1074 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1075 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1076 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1077 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1078 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1079 extern int MatZeroEntries_SeqBAIJ(Mat);
1080 
1081 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1082 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1083 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1084 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1085 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1086 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1087 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1088 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1089 
1090 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1091 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1092 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1093 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1094 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1095 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1096 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1097 
1098 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1099 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1100 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1101 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1102 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1103 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1104 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1105 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1106 
1107 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1108 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1109 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1110 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1111 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1112 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1113 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1114 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1118 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1119 {
1120   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1121   Mat         outA;
1122   int         ierr;
1123   PetscTruth  row_identity, col_identity;
1124 
1125   PetscFunctionBegin;
1126   if (info && info->fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
1127   ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr);
1128   ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr);
1129   if (!row_identity || !col_identity) {
1130     SETERRQ(1,1,"Row and column permutations must be identity");
1131   }
1132 
1133   outA          = inA;
1134   inA->factor   = FACTOR_LU;
1135   a->row        = row;
1136   a->col        = col;
1137 
1138   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1139   ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr);
1140   PLogObjectParent(inA,a->icol);
1141 
1142   if (!a->solve_work) {
1143     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1144     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1145   }
1146 
1147   if (!a->diag) {
1148     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1149   }
1150   /*
1151       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1152       for ILU(0) factorization with natural ordering
1153   */
1154   switch (a->bs) {
1155     case 2:
1156     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1157     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1158     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1159     break;
1160   case 3:
1161     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1162     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1163     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1164     break;
1165   case 4:
1166     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1167     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1168     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1169     break;
1170   case 5:
1171     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1172     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1173     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1174     break;
1175   case 6:
1176     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1177     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1178     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1179     break;
1180   case 7:
1181     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1182     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1183     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1184     break;
1185   }
1186 
1187   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1188 
1189 
1190   PetscFunctionReturn(0);
1191 }
1192 #undef __FUNC__
1193 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1194 int MatPrintHelp_SeqBAIJ(Mat A)
1195 {
1196   static int called = 0;
1197   MPI_Comm   comm = A->comm;
1198 
1199   PetscFunctionBegin;
1200   if (called) {PetscFunctionReturn(0);} else called = 1;
1201   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1202   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 EXTERN_C_BEGIN
1207 #undef __FUNC__
1208 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1209 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1210 {
1211   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1212   int         i,nz,n;
1213 
1214   PetscFunctionBegin;
1215   nz = baij->maxnz;
1216   n  = baij->n;
1217   for (i=0; i<nz; i++) {
1218     baij->j[i] = indices[i];
1219   }
1220   baij->nz = nz;
1221   for ( i=0; i<n; i++ ) {
1222     baij->ilen[i] = baij->imax[i];
1223   }
1224 
1225   PetscFunctionReturn(0);
1226 }
1227 EXTERN_C_END
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1231 /*@
1232     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1233        in the matrix.
1234 
1235   Input Parameters:
1236 +  mat - the SeqBAIJ matrix
1237 -  indices - the column indices
1238 
1239   Level: advanced
1240 
1241   Notes:
1242     This can be called if you have precomputed the nonzero structure of the
1243   matrix and want to provide it to the matrix object to improve the performance
1244   of the MatSetValues() operation.
1245 
1246     You MUST have set the correct numbers of nonzeros per row in the call to
1247   MatCreateSeqBAIJ().
1248 
1249     MUST be called before any calls to MatSetValues();
1250 
1251 @*/
1252 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1253 {
1254   int ierr,(*f)(Mat,int *);
1255 
1256   PetscFunctionBegin;
1257   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1258   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);
1259          CHKERRQ(ierr);
1260   if (f) {
1261     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1262   } else {
1263     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 /* -------------------------------------------------------------------*/
1269 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1270        MatGetRow_SeqBAIJ,
1271        MatRestoreRow_SeqBAIJ,
1272        MatMult_SeqBAIJ_N,
1273        MatMultAdd_SeqBAIJ_N,
1274        MatMultTrans_SeqBAIJ,
1275        MatMultTransAdd_SeqBAIJ,
1276        MatSolve_SeqBAIJ_N,
1277        0,
1278        0,
1279        0,
1280        MatLUFactor_SeqBAIJ,
1281        0,
1282        0,
1283        MatTranspose_SeqBAIJ,
1284        MatGetInfo_SeqBAIJ,
1285        MatEqual_SeqBAIJ,
1286        MatGetDiagonal_SeqBAIJ,
1287        MatDiagonalScale_SeqBAIJ,
1288        MatNorm_SeqBAIJ,
1289        0,
1290        MatAssemblyEnd_SeqBAIJ,
1291        0,
1292        MatSetOption_SeqBAIJ,
1293        MatZeroEntries_SeqBAIJ,
1294        MatZeroRows_SeqBAIJ,
1295        MatLUFactorSymbolic_SeqBAIJ,
1296        MatLUFactorNumeric_SeqBAIJ_N,
1297        0,
1298        0,
1299        MatGetSize_SeqBAIJ,
1300        MatGetSize_SeqBAIJ,
1301        MatGetOwnershipRange_SeqBAIJ,
1302        MatILUFactorSymbolic_SeqBAIJ,
1303        0,
1304        0,
1305        0,
1306        MatDuplicate_SeqBAIJ,
1307        0,
1308        0,
1309        MatILUFactor_SeqBAIJ,
1310        0,
1311        0,
1312        MatGetSubMatrices_SeqBAIJ,
1313        MatIncreaseOverlap_SeqBAIJ,
1314        MatGetValues_SeqBAIJ,
1315        0,
1316        MatPrintHelp_SeqBAIJ,
1317        MatScale_SeqBAIJ,
1318        0,
1319        0,
1320        0,
1321        MatGetBlockSize_SeqBAIJ,
1322        MatGetRowIJ_SeqBAIJ,
1323        MatRestoreRowIJ_SeqBAIJ,
1324        0,
1325        0,
1326        0,
1327        0,
1328        0,
1329        0,
1330        MatSetValuesBlocked_SeqBAIJ,
1331        MatGetSubMatrix_SeqBAIJ,
1332        0,
1333        0,
1334        MatGetMaps_Petsc};
1335 
1336 #undef __FUNC__
1337 #define __FUNC__ "MatCreateSeqBAIJ"
1338 /*@C
1339    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1340    compressed row) format.  For good matrix assembly performance the
1341    user should preallocate the matrix storage by setting the parameter nz
1342    (or the array nzz).  By setting these parameters accurately, performance
1343    during matrix assembly can be increased by more than a factor of 50.
1344 
1345    Collective on MPI_Comm
1346 
1347    Input Parameters:
1348 +  comm - MPI communicator, set to PETSC_COMM_SELF
1349 .  bs - size of block
1350 .  m - number of rows
1351 .  n - number of columns
1352 .  nz - number of block nonzeros per block row (same for all rows)
1353 -  nzz - array containing the number of block nonzeros in the various block rows
1354          (possibly different for each block row) or PETSC_NULL
1355 
1356    Output Parameter:
1357 .  A - the matrix
1358 
1359    Options Database Keys:
1360 .   -mat_no_unroll - uses code that does not unroll the loops in the
1361                      block calculations (much slower)
1362 .    -mat_block_size - size of the blocks to use
1363 
1364    Level: intermediate
1365 
1366    Notes:
1367    The block AIJ format is fully compatible with standard Fortran 77
1368    storage.  That is, the stored row and column indices can begin at
1369    either one (as in Fortran) or zero.  See the users' manual for details.
1370 
1371    Specify the preallocated storage with either nz or nnz (not both).
1372    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1373    allocation.  For additional details, see the users manual chapter on
1374    matrices.
1375 
1376 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1377 @*/
1378 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1379 {
1380   Mat         B;
1381   Mat_SeqBAIJ *b;
1382   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1383 
1384   PetscFunctionBegin;
1385   MPI_Comm_size(comm,&size);
1386   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1387 
1388   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1389 
1390   if (mbs*bs!=m || nbs*bs!=n) {
1391     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1392   }
1393 
1394   *A = 0;
1395   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1396   PLogObjectCreate(B);
1397   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1398   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1399   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1400   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1401   if (!flg) {
1402     switch (bs) {
1403     case 1:
1404       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1405       B->ops->solve           = MatSolve_SeqBAIJ_1;
1406       B->ops->mult            = MatMult_SeqBAIJ_1;
1407       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1408       break;
1409     case 2:
1410       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1411       B->ops->solve           = MatSolve_SeqBAIJ_2;
1412       B->ops->mult            = MatMult_SeqBAIJ_2;
1413       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1414       break;
1415     case 3:
1416       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1417       B->ops->solve           = MatSolve_SeqBAIJ_3;
1418       B->ops->mult            = MatMult_SeqBAIJ_3;
1419       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1420       break;
1421     case 4:
1422       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1423       B->ops->solve           = MatSolve_SeqBAIJ_4;
1424       B->ops->mult            = MatMult_SeqBAIJ_4;
1425       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1426       break;
1427     case 5:
1428       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1429       B->ops->solve           = MatSolve_SeqBAIJ_5;
1430       B->ops->mult            = MatMult_SeqBAIJ_5;
1431       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1432       break;
1433     case 6:
1434       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1435       B->ops->solve           = MatSolve_SeqBAIJ_6;
1436       B->ops->mult            = MatMult_SeqBAIJ_6;
1437       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1438       break;
1439     case 7:
1440       B->ops->mult            = MatMult_SeqBAIJ_7;
1441       B->ops->solve           = MatSolve_SeqBAIJ_7;
1442       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1443       break;
1444     }
1445   }
1446   B->ops->destroy     = MatDestroy_SeqBAIJ;
1447   B->ops->view        = MatView_SeqBAIJ;
1448   B->factor           = 0;
1449   B->lupivotthreshold = 1.0;
1450   B->mapping          = 0;
1451   b->row              = 0;
1452   b->col              = 0;
1453   b->icol             = 0;
1454   b->reallocs         = 0;
1455 
1456   b->m       = m; B->m = m; B->M = m;
1457   b->n       = n; B->n = n; B->N = n;
1458 
1459   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1460   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1461 
1462   b->mbs     = mbs;
1463   b->nbs     = nbs;
1464   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1465   if (nnz == PETSC_NULL) {
1466     if (nz == PETSC_DEFAULT) nz = 5;
1467     else if (nz <= 0)        nz = 1;
1468     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1469     nz = nz*mbs;
1470   } else {
1471     nz = 0;
1472     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1473   }
1474 
1475   /* allocate the matrix space */
1476   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1477   b->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1478   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1479   b->j  = (int *) (b->a + nz*bs2);
1480   PetscMemzero(b->j,nz*sizeof(int));
1481   b->i  = b->j + nz;
1482   b->singlemalloc = 1;
1483 
1484   b->i[0] = 0;
1485   for (i=1; i<mbs+1; i++) {
1486     b->i[i] = b->i[i-1] + b->imax[i-1];
1487   }
1488 
1489   /* b->ilen will count nonzeros in each block row so far. */
1490   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1491   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1492   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1493 
1494   b->bs               = bs;
1495   b->bs2              = bs2;
1496   b->mbs              = mbs;
1497   b->nz               = 0;
1498   b->maxnz            = nz*bs2;
1499   b->sorted           = 0;
1500   b->roworiented      = 1;
1501   b->nonew            = 0;
1502   b->diag             = 0;
1503   b->solve_work       = 0;
1504   b->mult_work        = 0;
1505   b->spptr            = 0;
1506   B->info.nz_unneeded = (double)b->maxnz;
1507 
1508   *A = B;
1509   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1510   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1511 
1512   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1513                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1514                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNC__
1519 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1520 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1521 {
1522   Mat         C;
1523   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1524   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1525 
1526   PetscFunctionBegin;
1527   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1528 
1529   *B = 0;
1530   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1531   PLogObjectCreate(C);
1532   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1533   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1534   C->ops->destroy    = MatDestroy_SeqBAIJ;
1535   C->ops->view       = MatView_SeqBAIJ;
1536   C->factor          = A->factor;
1537   c->row             = 0;
1538   c->col             = 0;
1539   c->icol            = 0;
1540   C->assembled       = PETSC_TRUE;
1541 
1542   c->m = C->m   = a->m;
1543   c->n = C->n   = a->n;
1544   C->M          = a->m;
1545   C->N          = a->n;
1546 
1547   c->bs         = a->bs;
1548   c->bs2        = a->bs2;
1549   c->mbs        = a->mbs;
1550   c->nbs        = a->nbs;
1551 
1552   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1553   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1554   for ( i=0; i<mbs; i++ ) {
1555     c->imax[i] = a->imax[i];
1556     c->ilen[i] = a->ilen[i];
1557   }
1558 
1559   /* allocate the matrix space */
1560   c->singlemalloc = 1;
1561   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1562   c->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1563   c->j  = (int *) (c->a + nz*bs2);
1564   c->i  = c->j + nz;
1565   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1566   if (mbs > 0) {
1567     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1568     if (cpvalues == MAT_COPY_VALUES) {
1569       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1570     } else {
1571       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1572     }
1573   }
1574 
1575   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1576   c->sorted      = a->sorted;
1577   c->roworiented = a->roworiented;
1578   c->nonew       = a->nonew;
1579 
1580   if (a->diag) {
1581     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1582     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1583     for ( i=0; i<mbs; i++ ) {
1584       c->diag[i] = a->diag[i];
1585     }
1586   } else c->diag        = 0;
1587   c->nz                 = a->nz;
1588   c->maxnz              = a->maxnz;
1589   c->solve_work         = 0;
1590   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1591   c->mult_work          = 0;
1592   *B = C;
1593   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNC__
1598 #define __FUNC__ "MatLoad_SeqBAIJ"
1599 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1600 {
1601   Mat_SeqBAIJ  *a;
1602   Mat          B;
1603   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1604   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1605   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1606   int          *masked, nmask,tmp,bs2,ishift;
1607   Scalar       *aa;
1608   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1609 
1610   PetscFunctionBegin;
1611   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1612   bs2  = bs*bs;
1613 
1614   MPI_Comm_size(comm,&size);
1615   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1616   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1617   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
1618   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1619   M = header[1]; N = header[2]; nz = header[3];
1620 
1621   if (header[3] < 0) {
1622     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1623   }
1624 
1625   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1626 
1627   /*
1628      This code adds extra rows to make sure the number of rows is
1629     divisible by the blocksize
1630   */
1631   mbs        = M/bs;
1632   extra_rows = bs - M + bs*(mbs);
1633   if (extra_rows == bs) extra_rows = 0;
1634   else                  mbs++;
1635   if (extra_rows) {
1636     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1637   }
1638 
1639   /* read in row lengths */
1640   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1641   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1642   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1643 
1644   /* read in column indices */
1645   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1646   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
1647   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1648 
1649   /* loop over row lengths determining block row lengths */
1650   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1651   PetscMemzero(browlengths,mbs*sizeof(int));
1652   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1653   PetscMemzero(mask,mbs*sizeof(int));
1654   masked = mask + mbs;
1655   rowcount = 0; nzcount = 0;
1656   for ( i=0; i<mbs; i++ ) {
1657     nmask = 0;
1658     for ( j=0; j<bs; j++ ) {
1659       kmax = rowlengths[rowcount];
1660       for ( k=0; k<kmax; k++ ) {
1661         tmp = jj[nzcount++]/bs;
1662         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1663       }
1664       rowcount++;
1665     }
1666     browlengths[i] += nmask;
1667     /* zero out the mask elements we set */
1668     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1669   }
1670 
1671   /* create our matrix */
1672   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1673   B = *A;
1674   a = (Mat_SeqBAIJ *) B->data;
1675 
1676   /* set matrix "i" values */
1677   a->i[0] = 0;
1678   for ( i=1; i<= mbs; i++ ) {
1679     a->i[i]      = a->i[i-1] + browlengths[i-1];
1680     a->ilen[i-1] = browlengths[i-1];
1681   }
1682   a->nz         = 0;
1683   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1684 
1685   /* read in nonzero values */
1686   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1687   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
1688   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1689 
1690   /* set "a" and "j" values into matrix */
1691   nzcount = 0; jcount = 0;
1692   for ( i=0; i<mbs; i++ ) {
1693     nzcountb = nzcount;
1694     nmask    = 0;
1695     for ( j=0; j<bs; j++ ) {
1696       kmax = rowlengths[i*bs+j];
1697       for ( k=0; k<kmax; k++ ) {
1698         tmp = jj[nzcount++]/bs;
1699 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1700       }
1701       rowcount++;
1702     }
1703     /* sort the masked values */
1704     PetscSortInt(nmask,masked);
1705 
1706     /* set "j" values into matrix */
1707     maskcount = 1;
1708     for ( j=0; j<nmask; j++ ) {
1709       a->j[jcount++]  = masked[j];
1710       mask[masked[j]] = maskcount++;
1711     }
1712     /* set "a" values into matrix */
1713     ishift = bs2*a->i[i];
1714     for ( j=0; j<bs; j++ ) {
1715       kmax = rowlengths[i*bs+j];
1716       for ( k=0; k<kmax; k++ ) {
1717         tmp       = jj[nzcountb]/bs ;
1718         block     = mask[tmp] - 1;
1719         point     = jj[nzcountb] - bs*tmp;
1720         idx       = ishift + bs2*block + j + bs*point;
1721         a->a[idx] = aa[nzcountb++];
1722       }
1723     }
1724     /* zero out the mask elements we set */
1725     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1726   }
1727   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1728 
1729   PetscFree(rowlengths);
1730   PetscFree(browlengths);
1731   PetscFree(aa);
1732   PetscFree(jj);
1733   PetscFree(mask);
1734 
1735   B->assembled = PETSC_TRUE;
1736 
1737   ierr = MatView_Private(B); CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 
1742 
1743