xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bc5ccf886fc7b029025ea4de8c4047e1db2135de)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.165 1999/02/18 17:13:47 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     PetscFree(*ia);
99     PetscFree(*ja);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   if (--A->refct > 0) PetscFunctionReturn(0);
128 
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(USE_PETSC_LOG)
142   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   PetscFree(a->a);
145   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
146   if (a->diag) PetscFree(a->diag);
147   if (a->ilen) PetscFree(a->ilen);
148   if (a->imax) PetscFree(a->imax);
149   if (a->solve_work) PetscFree(a->solve_work);
150   if (a->mult_work) PetscFree(a->mult_work);
151   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
152   PetscFree(a);
153   PLogObjectDestroy(A);
154   PetscHeaderDestroy(A);
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNC__
159 #define __FUNC__ "MatSetOption_SeqBAIJ"
160 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
161 {
162   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
163 
164   PetscFunctionBegin;
165   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
166   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
167   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
168   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
169   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
170   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
171   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
172   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
173   else if (op == MAT_ROWS_SORTED ||
174            op == MAT_ROWS_UNSORTED ||
175            op == MAT_SYMMETRIC ||
176            op == MAT_STRUCTURALLY_SYMMETRIC ||
177            op == MAT_YES_NEW_DIAGONALS ||
178            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
179            op == MAT_USE_HASH_TABLE) {
180     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
181   } else if (op == MAT_NO_NEW_DIAGONALS) {
182     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
183   } else {
184     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatGetSize_SeqBAIJ"
192 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
193 {
194   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
195 
196   PetscFunctionBegin;
197   if (m) *m = a->m;
198   if (n) *n = a->n;
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNC__
203 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
204 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
205 {
206   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
207 
208   PetscFunctionBegin;
209   *m = 0; *n = a->m;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetRow_SeqBAIJ"
215 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
216 {
217   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
218   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
219   MatScalar    *aa,*aa_i;
220   Scalar       *v_i;
221 
222   PetscFunctionBegin;
223   bs  = a->bs;
224   ai  = a->i;
225   aj  = a->j;
226   aa  = a->a;
227   bs2 = a->bs2;
228 
229   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
230 
231   bn  = row/bs;   /* Block number */
232   bp  = row % bs; /* Block Position */
233   M   = ai[bn+1] - ai[bn];
234   *nz = bs*M;
235 
236   if (v) {
237     *v = 0;
238     if (*nz) {
239       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
240       for ( i=0; i<M; i++ ) { /* for each block in the block row */
241         v_i  = *v + i*bs;
242         aa_i = aa + bs2*(ai[bn] + i);
243         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
244       }
245     }
246   }
247 
248   if (idx) {
249     *idx = 0;
250     if (*nz) {
251       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
252       for ( i=0; i<M; i++ ) { /* for each block in the block row */
253         idx_i = *idx + i*bs;
254         itmp  = bs*aj[ai[bn] + i];
255         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
264 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
265 {
266   PetscFunctionBegin;
267   if (idx) {if (*idx) PetscFree(*idx);}
268   if (v)   {if (*v)   PetscFree(*v);}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNC__
273 #define __FUNC__ "MatTranspose_SeqBAIJ"
274 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
275 {
276   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
277   Mat         C;
278   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
279   int         *rows,*cols,bs2=a->bs2;
280   MatScalar   *array = a->a;
281 
282   PetscFunctionBegin;
283   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
284   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
285   PetscMemzero(col,(1+nbs)*sizeof(int));
286 
287   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
288   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
289   PetscFree(col);
290   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
291   cols = rows + bs;
292   for ( i=0; i<mbs; i++ ) {
293     cols[0] = i*bs;
294     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
295     len = ai[i+1] - ai[i];
296     for ( j=0; j<len; j++ ) {
297       rows[0] = (*aj++)*bs;
298       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
299       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
300       array += bs2;
301     }
302   }
303   PetscFree(rows);
304 
305   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
306   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
307 
308   if (B != PETSC_NULL) {
309     *B = C;
310   } else {
311     PetscOps *Abops;
312     MatOps   Aops;
313 
314     /* This isn't really an in-place transpose */
315     PetscFree(a->a);
316     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
317     if (a->diag) PetscFree(a->diag);
318     if (a->ilen) PetscFree(a->ilen);
319     if (a->imax) PetscFree(a->imax);
320     if (a->solve_work) PetscFree(a->solve_work);
321     PetscFree(a);
322 
323 
324     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
325     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
326 
327     /*
328        This is horrible, horrible code. We need to keep the
329       A pointers for the bops and ops but copy everything
330       else from C.
331     */
332     Abops    = A->bops;
333     Aops     = A->ops;
334     PetscMemcpy(A,C,sizeof(struct _p_Mat));
335     A->bops  = Abops;
336     A->ops   = Aops;
337     A->qlist = 0;
338 
339     PetscHeaderDestroy(C);
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ_Binary"
346 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
347 {
348   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
349   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
350   Scalar      *aa;
351   FILE        *file;
352 
353   PetscFunctionBegin;
354   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
355   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
356   col_lens[0] = MAT_COOKIE;
357 
358   col_lens[1] = a->m;
359   col_lens[2] = a->n;
360   col_lens[3] = a->nz*bs2;
361 
362   /* store lengths of each row and write (including header) to file */
363   count = 0;
364   for ( i=0; i<a->mbs; i++ ) {
365     for ( j=0; j<bs; j++ ) {
366       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
367     }
368   }
369   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
370   PetscFree(col_lens);
371 
372   /* store column indices (zero start index) */
373   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
374   count = 0;
375   for ( i=0; i<a->mbs; i++ ) {
376     for ( j=0; j<bs; j++ ) {
377       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
378         for ( l=0; l<bs; l++ ) {
379           jj[count++] = bs*a->j[k] + l;
380         }
381       }
382     }
383   }
384   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
385   PetscFree(jj);
386 
387   /* store nonzero values */
388   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
389   count = 0;
390   for ( i=0; i<a->mbs; i++ ) {
391     for ( j=0; j<bs; j++ ) {
392       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
393         for ( l=0; l<bs; l++ ) {
394           aa[count++] = a->a[bs2*k + l*bs + j];
395         }
396       }
397     }
398   }
399   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
400   PetscFree(aa);
401 
402   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
403   if (file) {
404     fprintf(file,"-matload_block_size %d\n",a->bs);
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
411 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
412 {
413   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
414   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
415   FILE        *fd;
416   char        *outputname;
417 
418   PetscFunctionBegin;
419   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
420   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
421   ierr = ViewerGetFormat(viewer,&format);
422   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
423     ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);
424   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
425     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
426   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
427     for ( i=0; i<a->mbs; i++ ) {
428       for ( j=0; j<bs; j++ ) {
429         fprintf(fd,"row %d:",i*bs+j);
430         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
431           for ( l=0; l<bs; l++ ) {
432 #if defined(USE_PETSC_COMPLEX)
433             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
434               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
435                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
436             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
437               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
438                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
439             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
440               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
441             }
442 #else
443             if (a->a[bs2*k + l*bs + j] != 0.0) {
444               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
445             }
446 #endif
447           }
448         }
449         fprintf(fd,"\n");
450       }
451     }
452   } else {
453     for ( i=0; i<a->mbs; i++ ) {
454       for ( j=0; j<bs; j++ ) {
455         fprintf(fd,"row %d:",i*bs+j);
456         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
457           for ( l=0; l<bs; l++ ) {
458 #if defined(USE_PETSC_COMPLEX)
459             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
460               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
461                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
462             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
463               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
464                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
465             } else {
466               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
467             }
468 #else
469             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
470 #endif
471           }
472         }
473         fprintf(fd,"\n");
474       }
475     }
476   }
477   fflush(fd);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNC__
482 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
483 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
484 {
485   Mat          A = (Mat) Aa;
486   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
487   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
488   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
489   MatScalar    *aa;
490   MPI_Comm     comm;
491   Viewer       viewer;
492 
493   PetscFunctionBegin;
494   /*
495       This is nasty. If this is called from an originally parallel matrix
496    then all processes call this, but only the first has the matrix so the
497    rest should return immediately.
498   */
499   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
500   MPI_Comm_rank(comm,&rank);
501   if (rank) PetscFunctionReturn(0);
502 
503   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
504 
505   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr);
506 
507   /* loop over matrix elements drawing boxes */
508   color = DRAW_BLUE;
509   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
510     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
511       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
512       x_l = a->j[j]*bs; x_r = x_l + 1.0;
513       aa = a->a + j*bs2;
514       for ( k=0; k<bs; k++ ) {
515         for ( l=0; l<bs; l++ ) {
516           if (PetscReal(*aa++) >=  0.) continue;
517           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
518         }
519       }
520     }
521   }
522   color = DRAW_CYAN;
523   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
524     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
525       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
526       x_l = a->j[j]*bs; x_r = x_l + 1.0;
527       aa = a->a + j*bs2;
528       for ( k=0; k<bs; k++ ) {
529         for ( l=0; l<bs; l++ ) {
530           if (PetscReal(*aa++) != 0.) continue;
531           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
532         }
533       }
534     }
535   }
536 
537   color = DRAW_RED;
538   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
539     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
540       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
541       x_l = a->j[j]*bs; x_r = x_l + 1.0;
542       aa = a->a + j*bs2;
543       for ( k=0; k<bs; k++ ) {
544         for ( l=0; l<bs; l++ ) {
545           if (PetscReal(*aa++) <= 0.) continue;
546           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
547         }
548       }
549     }
550   }
551   PetscFunctionReturn(0);
552 }
553 
554 #undef __FUNC__
555 #define __FUNC__ "MatView_SeqBAIJ_Draw"
556 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
557 {
558   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
559   int          ierr;
560   double       xl,yl,xr,yr,w,h;
561   Draw         draw;
562   PetscTruth   isnull;
563 
564   PetscFunctionBegin;
565 
566   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
567   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
568 
569   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
570   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
571   xr += w;    yr += h;  xl = -w;     yl = -h;
572   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
573   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr);
574   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNC__
579 #define __FUNC__ "MatView_SeqBAIJ"
580 int MatView_SeqBAIJ(Mat A,Viewer viewer)
581 {
582   ViewerType  vtype;
583   int         ierr;
584 
585   PetscFunctionBegin;
586   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
587   if (PetscTypeCompare(vtype,SOCKET_VIEWER)) {
588     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
589   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
590     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
591   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
592     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
593   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
594     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
595   } else {
596     SETERRQ(1,1,"Viewer type not supported by PETSc object");
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 
602 #undef __FUNC__
603 #define __FUNC__ "MatGetValues_SeqBAIJ"
604 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
605 {
606   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
607   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
608   int        *ai = a->i, *ailen = a->ilen;
609   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
610   MatScalar  *ap, *aa = a->a, zero = 0.0;
611 
612   PetscFunctionBegin;
613   for ( k=0; k<m; k++ ) { /* loop over rows */
614     row  = im[k]; brow = row/bs;
615     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
616     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
617     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
618     nrow = ailen[brow];
619     for ( l=0; l<n; l++ ) { /* loop over columns */
620       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
621       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
622       col  = in[l] ;
623       bcol = col/bs;
624       cidx = col%bs;
625       ridx = row%bs;
626       high = nrow;
627       low  = 0; /* assume unsorted */
628       while (high-low > 5) {
629         t = (low+high)/2;
630         if (rp[t] > bcol) high = t;
631         else             low  = t;
632       }
633       for ( i=low; i<high; i++ ) {
634         if (rp[i] > bcol) break;
635         if (rp[i] == bcol) {
636           *v++ = ap[bs2*i+bs*cidx+ridx];
637           goto finished;
638         }
639       }
640       *v++ = zero;
641       finished:;
642     }
643   }
644   PetscFunctionReturn(0);
645 }
646 
647 
648 #undef __FUNC__
649 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
650 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
651 {
652   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
653   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
654   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
655   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
656   Scalar      *value = v;
657   MatScalar   *ap,*aa=a->a,*bap;
658 
659   PetscFunctionBegin;
660   if (roworiented) {
661     stepval = (n-1)*bs;
662   } else {
663     stepval = (m-1)*bs;
664   }
665   for ( k=0; k<m; k++ ) { /* loop over added rows */
666     row  = im[k];
667     if (row < 0) continue;
668 #if defined(USE_PETSC_BOPT_g)
669     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
670 #endif
671     rp   = aj + ai[row];
672     ap   = aa + bs2*ai[row];
673     rmax = imax[row];
674     nrow = ailen[row];
675     low  = 0;
676     for ( l=0; l<n; l++ ) { /* loop over added columns */
677       if (in[l] < 0) continue;
678 #if defined(USE_PETSC_BOPT_g)
679       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
680 #endif
681       col = in[l];
682       if (roworiented) {
683         value = v + k*(stepval+bs)*bs + l*bs;
684       } else {
685         value = v + l*(stepval+bs)*bs + k*bs;
686       }
687       if (!sorted) low = 0; high = nrow;
688       while (high-low > 7) {
689         t = (low+high)/2;
690         if (rp[t] > col) high = t;
691         else             low  = t;
692       }
693       for ( i=low; i<high; i++ ) {
694         if (rp[i] > col) break;
695         if (rp[i] == col) {
696           bap  = ap +  bs2*i;
697           if (roworiented) {
698             if (is == ADD_VALUES) {
699               for ( ii=0; ii<bs; ii++,value+=stepval ) {
700                 for (jj=ii; jj<bs2; jj+=bs ) {
701                   bap[jj] += *value++;
702                 }
703               }
704             } else {
705               for ( ii=0; ii<bs; ii++,value+=stepval ) {
706                 for (jj=ii; jj<bs2; jj+=bs ) {
707                   bap[jj] = *value++;
708                 }
709               }
710             }
711           } else {
712             if (is == ADD_VALUES) {
713               for ( ii=0; ii<bs; ii++,value+=stepval ) {
714                 for (jj=0; jj<bs; jj++ ) {
715                   *bap++ += *value++;
716                 }
717               }
718             } else {
719               for ( ii=0; ii<bs; ii++,value+=stepval ) {
720                 for (jj=0; jj<bs; jj++ ) {
721                   *bap++  = *value++;
722                 }
723               }
724             }
725           }
726           goto noinsert2;
727         }
728       }
729       if (nonew == 1) goto noinsert2;
730       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
731       if (nrow >= rmax) {
732         /* there is no extra room in row, therefore enlarge */
733         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
734         MatScalar *new_a;
735 
736         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
737 
738         /* malloc new storage space */
739         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
740         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
741         new_j   = (int *) (new_a + bs2*new_nz);
742         new_i   = new_j + new_nz;
743 
744         /* copy over old data into new slots */
745         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
746         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
747         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
748         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
749         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
750         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
751         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
752         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
753         /* free up old matrix storage */
754         PetscFree(a->a);
755         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
756         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
757         a->singlemalloc = 1;
758 
759         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
760         rmax = imax[row] = imax[row] + CHUNKSIZE;
761         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
762         a->maxnz += bs2*CHUNKSIZE;
763         a->reallocs++;
764         a->nz++;
765       }
766       N = nrow++ - 1;
767       /* shift up all the later entries in this row */
768       for ( ii=N; ii>=i; ii-- ) {
769         rp[ii+1] = rp[ii];
770         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
771       }
772       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
773       rp[i] = col;
774       bap   = ap +  bs2*i;
775       if (roworiented) {
776         for ( ii=0; ii<bs; ii++,value+=stepval) {
777           for (jj=ii; jj<bs2; jj+=bs ) {
778             bap[jj] = *value++;
779           }
780         }
781       } else {
782         for ( ii=0; ii<bs; ii++,value+=stepval ) {
783           for (jj=0; jj<bs; jj++ ) {
784             *bap++  = *value++;
785           }
786         }
787       }
788       noinsert2:;
789       low = i;
790     }
791     ailen[row] = nrow;
792   }
793   PetscFunctionReturn(0);
794 }
795 
796 
797 #undef __FUNC__
798 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
799 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
800 {
801   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
802   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
803   int        m = a->m,*ip, N, *ailen = a->ilen;
804   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
805   MatScalar  *aa = a->a, *ap;
806 
807   PetscFunctionBegin;
808   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
809 
810   if (m) rmax = ailen[0];
811   for ( i=1; i<mbs; i++ ) {
812     /* move each row back by the amount of empty slots (fshift) before it*/
813     fshift += imax[i-1] - ailen[i-1];
814     rmax   = PetscMax(rmax,ailen[i]);
815     if (fshift) {
816       ip = aj + ai[i]; ap = aa + bs2*ai[i];
817       N = ailen[i];
818       for ( j=0; j<N; j++ ) {
819         ip[j-fshift] = ip[j];
820         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
821       }
822     }
823     ai[i] = ai[i-1] + ailen[i-1];
824   }
825   if (mbs) {
826     fshift += imax[mbs-1] - ailen[mbs-1];
827     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
828   }
829   /* reset ilen and imax for each row */
830   for ( i=0; i<mbs; i++ ) {
831     ailen[i] = imax[i] = ai[i+1] - ai[i];
832   }
833   a->nz = ai[mbs];
834 
835   /* diagonals may have moved, so kill the diagonal pointers */
836   if (fshift && a->diag) {
837     PetscFree(a->diag);
838     PLogObjectMemory(A,-(m+1)*sizeof(int));
839     a->diag = 0;
840   }
841   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
842            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
843   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
844            a->reallocs);
845   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
846   a->reallocs          = 0;
847   A->info.nz_unneeded  = (double)fshift*bs2;
848 
849   PetscFunctionReturn(0);
850 }
851 
852 
853 
854 /*
855    This function returns an array of flags which indicate the locations of contiguous
856    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
857    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
858    Assume: sizes should be long enough to hold all the values.
859 */
860 #undef __FUNC__
861 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
862 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
863 {
864   int i,j,k,row;
865   PetscTruth flg;
866 
867   /*   PetscFunctionBegin;*/
868   for ( i=0,j=0; i<n; j++ ) {
869     row = idx[i];
870     if (row%bs!=0) { /* Not the begining of a block */
871       sizes[j] = 1;
872       i++;
873     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
874       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
875       i++;
876     } else { /* Begining of the block, so check if the complete block exists */
877       flg = PETSC_TRUE;
878       for ( k=1; k<bs; k++ ) {
879         if (row+k != idx[i+k]) { /* break in the block */
880           flg = PETSC_FALSE;
881           break;
882         }
883       }
884       if (flg == PETSC_TRUE) { /* No break in the bs */
885         sizes[j] = bs;
886         i+= bs;
887       } else {
888         sizes[j] = 1;
889         i++;
890       }
891     }
892   }
893   *bs_max = j;
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNC__
898 #define __FUNC__ "MatZeroRows_SeqBAIJ"
899 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
900 {
901   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
902   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
903   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
904   Scalar      zero = 0.0;
905   MatScalar   *aa;
906 
907   PetscFunctionBegin;
908   /* Make a copy of the IS and  sort it */
909   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
910   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
911 
912   /* allocate memory for rows,sizes */
913   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int)); CHKPTRQ(rows);
914   sizes = rows + is_n;
915 
916   /* initialize copy IS valurs to rows, and sort them */
917   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
918   ierr = PetscSortInt(is_n,rows); CHKERRQ(ierr);
919   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max); CHKERRQ(ierr);
920   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
921 
922   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
923     row   = rows[j];
924     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
925     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
926     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
927     if (sizes[i] == bs) {
928       if (diag) {
929         if (baij->ilen[row/bs] > 0) {
930           baij->ilen[row/bs] = 1;
931           baij->j[baij->i[row/bs]] = row/bs;
932           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar)); CHKERRQ(ierr);
933         }
934         /* Now insert all the diagoanl values for this bs */
935         for ( k=0; k<bs; k++ ) {
936           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
937         }
938       } else { /* (!diag) */
939         baij->ilen[row/bs] = 0;
940       } /* end (!diag) */
941     } else { /* (sizes[i] != bs) */
942 #if defined (USE_PETSC_DEBUG)
943       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
944 #endif
945       for ( k=0; k<count; k++ ) {
946         aa[0] = zero;
947         aa+=bs;
948       }
949       if (diag) {
950         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
951       }
952     }
953   }
954 
955   PetscFree(rows);
956   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNC__
961 #define __FUNC__ "MatSetValues_SeqBAIJ"
962 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
963 {
964   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
965   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
966   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
967   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
968   int         ridx,cidx,bs2=a->bs2;
969   MatScalar   *ap,value,*aa=a->a,*bap;
970 
971   PetscFunctionBegin;
972   for ( k=0; k<m; k++ ) { /* loop over added rows */
973     row  = im[k]; brow = row/bs;
974     if (row < 0) continue;
975 #if defined(USE_PETSC_BOPT_g)
976     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
977 #endif
978     rp   = aj + ai[brow];
979     ap   = aa + bs2*ai[brow];
980     rmax = imax[brow];
981     nrow = ailen[brow];
982     low  = 0;
983     for ( l=0; l<n; l++ ) { /* loop over added columns */
984       if (in[l] < 0) continue;
985 #if defined(USE_PETSC_BOPT_g)
986       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
987 #endif
988       col = in[l]; bcol = col/bs;
989       ridx = row % bs; cidx = col % bs;
990       if (roworiented) {
991         value = v[l + k*n];
992       } else {
993         value = v[k + l*m];
994       }
995       if (!sorted) low = 0; high = nrow;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for ( i=low; i<high; i++ ) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap  = ap +  bs2*i + bs*cidx + ridx;
1005           if (is == ADD_VALUES) *bap += value;
1006           else                  *bap  = value;
1007           goto noinsert1;
1008         }
1009       }
1010       if (nonew == 1) goto noinsert1;
1011       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1012       if (nrow >= rmax) {
1013         /* there is no extra room in row, therefore enlarge */
1014         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1015         MatScalar *new_a;
1016 
1017         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1018 
1019         /* Malloc new storage space */
1020         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1021         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
1022         new_j   = (int *) (new_a + bs2*new_nz);
1023         new_i   = new_j + new_nz;
1024 
1025         /* copy over old data into new slots */
1026         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1027         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1028         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1029         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1030         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
1031                                                            len*sizeof(int));
1032         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1033         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1034         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
1035                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1036         /* free up old matrix storage */
1037         PetscFree(a->a);
1038         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
1039         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1040         a->singlemalloc = 1;
1041 
1042         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1043         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1044         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1045         a->maxnz += bs2*CHUNKSIZE;
1046         a->reallocs++;
1047         a->nz++;
1048       }
1049       N = nrow++ - 1;
1050       /* shift up all the later entries in this row */
1051       for ( ii=N; ii>=i; ii-- ) {
1052         rp[ii+1] = rp[ii];
1053         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1054       }
1055       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1056       rp[i]                      = bcol;
1057       ap[bs2*i + bs*cidx + ridx] = value;
1058       noinsert1:;
1059       low = i;
1060     }
1061     ailen[brow] = nrow;
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1067 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1068 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1069 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1070 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1071 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1072 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1073 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1074 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1075 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1076 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1077 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1078 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1079 extern int MatZeroEntries_SeqBAIJ(Mat);
1080 
1081 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1082 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1083 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1084 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1085 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1086 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1087 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1088 
1089 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1090 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1091 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1092 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1093 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1094 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1095 
1096 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1097 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1098 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1099 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1100 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1101 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1102 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1103 
1104 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1105 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1106 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1107 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1108 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1109 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1110 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1111 
1112 #undef __FUNC__
1113 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1114 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1115 {
1116   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1117   Mat         outA;
1118   int         ierr;
1119   PetscTruth  row_identity, col_identity;
1120 
1121   PetscFunctionBegin;
1122   if (info && info->fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
1123   ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr);
1124   ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr);
1125   if (!row_identity || !col_identity) {
1126     SETERRQ(1,1,"Row and column permutations must be identity");
1127   }
1128 
1129   outA          = inA;
1130   inA->factor   = FACTOR_LU;
1131   a->row        = row;
1132   a->col        = col;
1133 
1134   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1135   ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr);
1136   PLogObjectParent(inA,a->icol);
1137 
1138   if (!a->solve_work) {
1139     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1140     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1141   }
1142 
1143   if (!a->diag) {
1144     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1145   }
1146   /*
1147       Blocksize 4 and 5 have a special faster factorization/solver for ILU(0) factorization
1148     with natural ordering
1149   */
1150   if (a->bs == 4) {
1151     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1152     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1153     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");
1154   } else if (a->bs == 5) {
1155     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1156     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1157     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");
1158   }
1159 
1160   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1161 
1162 
1163   PetscFunctionReturn(0);
1164 }
1165 #undef __FUNC__
1166 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1167 int MatPrintHelp_SeqBAIJ(Mat A)
1168 {
1169   static int called = 0;
1170   MPI_Comm   comm = A->comm;
1171 
1172   PetscFunctionBegin;
1173   if (called) {PetscFunctionReturn(0);} else called = 1;
1174   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1175   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 EXTERN_C_BEGIN
1180 #undef __FUNC__
1181 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1182 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1183 {
1184   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1185   int         i,nz,n;
1186 
1187   PetscFunctionBegin;
1188   nz = baij->maxnz;
1189   n  = baij->n;
1190   for (i=0; i<nz; i++) {
1191     baij->j[i] = indices[i];
1192   }
1193   baij->nz = nz;
1194   for ( i=0; i<n; i++ ) {
1195     baij->ilen[i] = baij->imax[i];
1196   }
1197 
1198   PetscFunctionReturn(0);
1199 }
1200 EXTERN_C_END
1201 
1202 #undef __FUNC__
1203 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1204 /*@
1205     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1206        in the matrix.
1207 
1208   Input Parameters:
1209 +  mat - the SeqBAIJ matrix
1210 -  indices - the column indices
1211 
1212   Notes:
1213     This can be called if you have precomputed the nonzero structure of the
1214   matrix and want to provide it to the matrix object to improve the performance
1215   of the MatSetValues() operation.
1216 
1217     You MUST have set the correct numbers of nonzeros per row in the call to
1218   MatCreateSeqBAIJ().
1219 
1220     MUST be called before any calls to MatSetValues();
1221 
1222 @*/
1223 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1224 {
1225   int ierr,(*f)(Mat,int *);
1226 
1227   PetscFunctionBegin;
1228   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1229   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);
1230          CHKERRQ(ierr);
1231   if (f) {
1232     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1233   } else {
1234     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 /* -------------------------------------------------------------------*/
1240 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1241        MatGetRow_SeqBAIJ,
1242        MatRestoreRow_SeqBAIJ,
1243        MatMult_SeqBAIJ_N,
1244        MatMultAdd_SeqBAIJ_N,
1245        MatMultTrans_SeqBAIJ,
1246        MatMultTransAdd_SeqBAIJ,
1247        MatSolve_SeqBAIJ_N,
1248        0,
1249        0,
1250        0,
1251        MatLUFactor_SeqBAIJ,
1252        0,
1253        0,
1254        MatTranspose_SeqBAIJ,
1255        MatGetInfo_SeqBAIJ,
1256        MatEqual_SeqBAIJ,
1257        MatGetDiagonal_SeqBAIJ,
1258        MatDiagonalScale_SeqBAIJ,
1259        MatNorm_SeqBAIJ,
1260        0,
1261        MatAssemblyEnd_SeqBAIJ,
1262        0,
1263        MatSetOption_SeqBAIJ,
1264        MatZeroEntries_SeqBAIJ,
1265        MatZeroRows_SeqBAIJ,
1266        MatLUFactorSymbolic_SeqBAIJ,
1267        MatLUFactorNumeric_SeqBAIJ_N,
1268        0,
1269        0,
1270        MatGetSize_SeqBAIJ,
1271        MatGetSize_SeqBAIJ,
1272        MatGetOwnershipRange_SeqBAIJ,
1273        MatILUFactorSymbolic_SeqBAIJ,
1274        0,
1275        0,
1276        0,
1277        MatDuplicate_SeqBAIJ,
1278        0,
1279        0,
1280        MatILUFactor_SeqBAIJ,
1281        0,
1282        0,
1283        MatGetSubMatrices_SeqBAIJ,
1284        MatIncreaseOverlap_SeqBAIJ,
1285        MatGetValues_SeqBAIJ,
1286        0,
1287        MatPrintHelp_SeqBAIJ,
1288        MatScale_SeqBAIJ,
1289        0,
1290        0,
1291        0,
1292        MatGetBlockSize_SeqBAIJ,
1293        MatGetRowIJ_SeqBAIJ,
1294        MatRestoreRowIJ_SeqBAIJ,
1295        0,
1296        0,
1297        0,
1298        0,
1299        0,
1300        0,
1301        MatSetValuesBlocked_SeqBAIJ,
1302        MatGetSubMatrix_SeqBAIJ,
1303        0,
1304        0,
1305        MatGetMaps_Petsc};
1306 
1307 #undef __FUNC__
1308 #define __FUNC__ "MatCreateSeqBAIJ"
1309 /*@C
1310    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1311    compressed row) format.  For good matrix assembly performance the
1312    user should preallocate the matrix storage by setting the parameter nz
1313    (or the array nzz).  By setting these parameters accurately, performance
1314    during matrix assembly can be increased by more than a factor of 50.
1315 
1316    Collective on MPI_Comm
1317 
1318    Input Parameters:
1319 +  comm - MPI communicator, set to PETSC_COMM_SELF
1320 .  bs - size of block
1321 .  m - number of rows
1322 .  n - number of columns
1323 .  nz - number of block nonzeros per block row (same for all rows)
1324 -  nzz - array containing the number of block nonzeros in the various block rows
1325          (possibly different for each block row) or PETSC_NULL
1326 
1327    Output Parameter:
1328 .  A - the matrix
1329 
1330    Options Database Keys:
1331 .   -mat_no_unroll - uses code that does not unroll the loops in the
1332                      block calculations (much slower)
1333 .    -mat_block_size - size of the blocks to use
1334 
1335    Notes:
1336    The block AIJ format is fully compatible with standard Fortran 77
1337    storage.  That is, the stored row and column indices can begin at
1338    either one (as in Fortran) or zero.  See the users' manual for details.
1339 
1340    Specify the preallocated storage with either nz or nnz (not both).
1341    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1342    allocation.  For additional details, see the users manual chapter on
1343    matrices.
1344 
1345    Level: intermediate
1346 
1347 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1348 @*/
1349 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1350 {
1351   Mat         B;
1352   Mat_SeqBAIJ *b;
1353   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1354 
1355   PetscFunctionBegin;
1356   MPI_Comm_size(comm,&size);
1357   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1358 
1359   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1360 
1361   if (mbs*bs!=m || nbs*bs!=n) {
1362     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1363   }
1364 
1365   *A = 0;
1366   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1367   PLogObjectCreate(B);
1368   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1369   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1370   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1371   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1372   if (!flg) {
1373     switch (bs) {
1374     case 1:
1375       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1376       B->ops->solve           = MatSolve_SeqBAIJ_1;
1377       B->ops->mult            = MatMult_SeqBAIJ_1;
1378       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1379       break;
1380     case 2:
1381       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1382       B->ops->solve           = MatSolve_SeqBAIJ_2;
1383       B->ops->mult            = MatMult_SeqBAIJ_2;
1384       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1385       break;
1386     case 3:
1387       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1388       B->ops->solve           = MatSolve_SeqBAIJ_3;
1389       B->ops->mult            = MatMult_SeqBAIJ_3;
1390       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1391       break;
1392     case 4:
1393       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1394       B->ops->solve           = MatSolve_SeqBAIJ_4;
1395       B->ops->mult            = MatMult_SeqBAIJ_4;
1396       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1397       break;
1398     case 5:
1399       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1400       B->ops->solve           = MatSolve_SeqBAIJ_5;
1401       B->ops->mult            = MatMult_SeqBAIJ_5;
1402       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1403       break;
1404     case 7:
1405       B->ops->mult            = MatMult_SeqBAIJ_7;
1406       B->ops->solve           = MatSolve_SeqBAIJ_7;
1407       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1408       break;
1409     }
1410   }
1411   B->ops->destroy     = MatDestroy_SeqBAIJ;
1412   B->ops->view        = MatView_SeqBAIJ;
1413   B->factor           = 0;
1414   B->lupivotthreshold = 1.0;
1415   B->mapping          = 0;
1416   b->row              = 0;
1417   b->col              = 0;
1418   b->icol             = 0;
1419   b->reallocs         = 0;
1420 
1421   b->m       = m; B->m = m; B->M = m;
1422   b->n       = n; B->n = n; B->N = n;
1423 
1424   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1425   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1426 
1427   b->mbs     = mbs;
1428   b->nbs     = nbs;
1429   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1430   if (nnz == PETSC_NULL) {
1431     if (nz == PETSC_DEFAULT) nz = 5;
1432     else if (nz <= 0)        nz = 1;
1433     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1434     nz = nz*mbs;
1435   } else {
1436     nz = 0;
1437     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1438   }
1439 
1440   /* allocate the matrix space */
1441   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1442   b->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1443   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1444   b->j  = (int *) (b->a + nz*bs2);
1445   PetscMemzero(b->j,nz*sizeof(int));
1446   b->i  = b->j + nz;
1447   b->singlemalloc = 1;
1448 
1449   b->i[0] = 0;
1450   for (i=1; i<mbs+1; i++) {
1451     b->i[i] = b->i[i-1] + b->imax[i-1];
1452   }
1453 
1454   /* b->ilen will count nonzeros in each block row so far. */
1455   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1456   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1457   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1458 
1459   b->bs               = bs;
1460   b->bs2              = bs2;
1461   b->mbs              = mbs;
1462   b->nz               = 0;
1463   b->maxnz            = nz*bs2;
1464   b->sorted           = 0;
1465   b->roworiented      = 1;
1466   b->nonew            = 0;
1467   b->diag             = 0;
1468   b->solve_work       = 0;
1469   b->mult_work        = 0;
1470   b->spptr            = 0;
1471   B->info.nz_unneeded = (double)b->maxnz;
1472 
1473   *A = B;
1474   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1475   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1476 
1477   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1478                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1479                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNC__
1484 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1485 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1486 {
1487   Mat         C;
1488   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1489   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1490 
1491   PetscFunctionBegin;
1492   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1493 
1494   *B = 0;
1495   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1496   PLogObjectCreate(C);
1497   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1498   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1499   C->ops->destroy    = MatDestroy_SeqBAIJ;
1500   C->ops->view       = MatView_SeqBAIJ;
1501   C->factor          = A->factor;
1502   c->row             = 0;
1503   c->col             = 0;
1504   c->icol            = 0;
1505   C->assembled       = PETSC_TRUE;
1506 
1507   c->m = C->m   = a->m;
1508   c->n = C->n   = a->n;
1509   C->M          = a->m;
1510   C->N          = a->n;
1511 
1512   c->bs         = a->bs;
1513   c->bs2        = a->bs2;
1514   c->mbs        = a->mbs;
1515   c->nbs        = a->nbs;
1516 
1517   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1518   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1519   for ( i=0; i<mbs; i++ ) {
1520     c->imax[i] = a->imax[i];
1521     c->ilen[i] = a->ilen[i];
1522   }
1523 
1524   /* allocate the matrix space */
1525   c->singlemalloc = 1;
1526   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1527   c->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1528   c->j  = (int *) (c->a + nz*bs2);
1529   c->i  = c->j + nz;
1530   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1531   if (mbs > 0) {
1532     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1533     if (cpvalues == MAT_COPY_VALUES) {
1534       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1535     } else {
1536       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1537     }
1538   }
1539 
1540   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1541   c->sorted      = a->sorted;
1542   c->roworiented = a->roworiented;
1543   c->nonew       = a->nonew;
1544 
1545   if (a->diag) {
1546     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1547     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1548     for ( i=0; i<mbs; i++ ) {
1549       c->diag[i] = a->diag[i];
1550     }
1551   } else c->diag        = 0;
1552   c->nz                 = a->nz;
1553   c->maxnz              = a->maxnz;
1554   c->solve_work         = 0;
1555   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1556   c->mult_work          = 0;
1557   *B = C;
1558   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNC__
1563 #define __FUNC__ "MatLoad_SeqBAIJ"
1564 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1565 {
1566   Mat_SeqBAIJ  *a;
1567   Mat          B;
1568   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1569   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1570   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1571   int          *masked, nmask,tmp,bs2,ishift;
1572   Scalar       *aa;
1573   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1574 
1575   PetscFunctionBegin;
1576   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1577   bs2  = bs*bs;
1578 
1579   MPI_Comm_size(comm,&size);
1580   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1581   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1582   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
1583   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1584   M = header[1]; N = header[2]; nz = header[3];
1585 
1586   if (header[3] < 0) {
1587     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1588   }
1589 
1590   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1591 
1592   /*
1593      This code adds extra rows to make sure the number of rows is
1594     divisible by the blocksize
1595   */
1596   mbs        = M/bs;
1597   extra_rows = bs - M + bs*(mbs);
1598   if (extra_rows == bs) extra_rows = 0;
1599   else                  mbs++;
1600   if (extra_rows) {
1601     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1602   }
1603 
1604   /* read in row lengths */
1605   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1606   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1607   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1608 
1609   /* read in column indices */
1610   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1611   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
1612   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1613 
1614   /* loop over row lengths determining block row lengths */
1615   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1616   PetscMemzero(browlengths,mbs*sizeof(int));
1617   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1618   PetscMemzero(mask,mbs*sizeof(int));
1619   masked = mask + mbs;
1620   rowcount = 0; nzcount = 0;
1621   for ( i=0; i<mbs; i++ ) {
1622     nmask = 0;
1623     for ( j=0; j<bs; j++ ) {
1624       kmax = rowlengths[rowcount];
1625       for ( k=0; k<kmax; k++ ) {
1626         tmp = jj[nzcount++]/bs;
1627         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1628       }
1629       rowcount++;
1630     }
1631     browlengths[i] += nmask;
1632     /* zero out the mask elements we set */
1633     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1634   }
1635 
1636   /* create our matrix */
1637   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1638   B = *A;
1639   a = (Mat_SeqBAIJ *) B->data;
1640 
1641   /* set matrix "i" values */
1642   a->i[0] = 0;
1643   for ( i=1; i<= mbs; i++ ) {
1644     a->i[i]      = a->i[i-1] + browlengths[i-1];
1645     a->ilen[i-1] = browlengths[i-1];
1646   }
1647   a->nz         = 0;
1648   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1649 
1650   /* read in nonzero values */
1651   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1652   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
1653   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1654 
1655   /* set "a" and "j" values into matrix */
1656   nzcount = 0; jcount = 0;
1657   for ( i=0; i<mbs; i++ ) {
1658     nzcountb = nzcount;
1659     nmask    = 0;
1660     for ( j=0; j<bs; j++ ) {
1661       kmax = rowlengths[i*bs+j];
1662       for ( k=0; k<kmax; k++ ) {
1663         tmp = jj[nzcount++]/bs;
1664 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1665       }
1666       rowcount++;
1667     }
1668     /* sort the masked values */
1669     PetscSortInt(nmask,masked);
1670 
1671     /* set "j" values into matrix */
1672     maskcount = 1;
1673     for ( j=0; j<nmask; j++ ) {
1674       a->j[jcount++]  = masked[j];
1675       mask[masked[j]] = maskcount++;
1676     }
1677     /* set "a" values into matrix */
1678     ishift = bs2*a->i[i];
1679     for ( j=0; j<bs; j++ ) {
1680       kmax = rowlengths[i*bs+j];
1681       for ( k=0; k<kmax; k++ ) {
1682         tmp       = jj[nzcountb]/bs ;
1683         block     = mask[tmp] - 1;
1684         point     = jj[nzcountb] - bs*tmp;
1685         idx       = ishift + bs2*block + j + bs*point;
1686         a->a[idx] = aa[nzcountb++];
1687       }
1688     }
1689     /* zero out the mask elements we set */
1690     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1691   }
1692   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1693 
1694   PetscFree(rowlengths);
1695   PetscFree(browlengths);
1696   PetscFree(aa);
1697   PetscFree(jj);
1698   PetscFree(mask);
1699 
1700   B->assembled = PETSC_TRUE;
1701 
1702   ierr = MatView_Private(B); CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 
1707 
1708