xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0f5bd95cca42d693ada6d048329ab39533680180)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.184 1999/10/01 21:21:27 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs,ierr;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     ierr = PetscFree(*ia);CHKERRQ(ierr);
99     ierr = PetscFree(*ja);CHKERRQ(ierr);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   PetscFunctionBegin;
128   if (A->mapping) {
129     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
130   }
131   if (A->bmapping) {
132     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
133   }
134   if (A->rmap) {
135     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
136   }
137   if (A->cmap) {
138     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
139   }
140 #if defined(PETSC_USE_LOG)
141   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
142 #endif
143   ierr = PetscFree(a->a);CHKERRQ(ierr);
144   if (!a->singlemalloc) {
145     ierr = PetscFree(a->i);CHKERRQ(ierr);
146     ierr = PetscFree(a->j);CHKERRQ(ierr);
147   }
148   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
149   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
150   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
151   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
152   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
153   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
154   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
155   ierr = PetscFree(a);CHKERRQ(ierr);
156   PLogObjectDestroy(A);
157   PetscHeaderDestroy(A);
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNC__
162 #define __FUNC__ "MatSetOption_SeqBAIJ"
163 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
164 {
165   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
166 
167   PetscFunctionBegin;
168   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
169   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
170   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
171   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
172   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
173   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
174   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
175   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
176   else if (op == MAT_ROWS_SORTED ||
177            op == MAT_ROWS_UNSORTED ||
178            op == MAT_SYMMETRIC ||
179            op == MAT_STRUCTURALLY_SYMMETRIC ||
180            op == MAT_YES_NEW_DIAGONALS ||
181            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
182            op == MAT_USE_HASH_TABLE) {
183     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
184   } else if (op == MAT_NO_NEW_DIAGONALS) {
185     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
186   } else {
187     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatGetSize_SeqBAIJ"
195 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
196 {
197   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
198 
199   PetscFunctionBegin;
200   if (m) *m = a->m;
201   if (n) *n = a->n;
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNC__
206 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
207 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
208 {
209   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
210 
211   PetscFunctionBegin;
212   *m = 0; *n = a->m;
213   PetscFunctionReturn(0);
214 }
215 
216 #undef __FUNC__
217 #define __FUNC__ "MatGetRow_SeqBAIJ"
218 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
219 {
220   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
221   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
222   MatScalar    *aa,*aa_i;
223   Scalar       *v_i;
224 
225   PetscFunctionBegin;
226   bs  = a->bs;
227   ai  = a->i;
228   aj  = a->j;
229   aa  = a->a;
230   bs2 = a->bs2;
231 
232   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
233 
234   bn  = row/bs;   /* Block number */
235   bp  = row % bs; /* Block Position */
236   M   = ai[bn+1] - ai[bn];
237   *nz = bs*M;
238 
239   if (v) {
240     *v = 0;
241     if (*nz) {
242       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
243       for ( i=0; i<M; i++ ) { /* for each block in the block row */
244         v_i  = *v + i*bs;
245         aa_i = aa + bs2*(ai[bn] + i);
246         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
247       }
248     }
249   }
250 
251   if (idx) {
252     *idx = 0;
253     if (*nz) {
254       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
255       for ( i=0; i<M; i++ ) { /* for each block in the block row */
256         idx_i = *idx + i*bs;
257         itmp  = bs*aj[ai[bn] + i];
258         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
259       }
260     }
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNC__
266 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
267 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
268 {
269   int ierr;
270 
271   PetscFunctionBegin;
272   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
273   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNC__
278 #define __FUNC__ "MatTranspose_SeqBAIJ"
279 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
280 {
281   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
282   Mat         C;
283   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
284   int         *rows,*cols,bs2=a->bs2;
285   MatScalar   *array = a->a;
286 
287   PetscFunctionBegin;
288   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
289   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
290   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
291 
292   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
293   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
294   ierr = PetscFree(col);CHKERRQ(ierr);
295   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
296   cols = rows + bs;
297   for ( i=0; i<mbs; i++ ) {
298     cols[0] = i*bs;
299     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
300     len = ai[i+1] - ai[i];
301     for ( j=0; j<len; j++ ) {
302       rows[0] = (*aj++)*bs;
303       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
304       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
305       array += bs2;
306     }
307   }
308   ierr = PetscFree(rows);CHKERRQ(ierr);
309 
310   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
311   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
312 
313   if (B != PETSC_NULL) {
314     *B = C;
315   } else {
316     PetscOps *Abops;
317     MatOps   Aops;
318 
319     /* This isn't really an in-place transpose */
320     ierr = PetscFree(a->a);CHKERRQ(ierr);
321     if (!a->singlemalloc) {
322       ierr = PetscFree(a->i);CHKERRQ(ierr);
323       ierr = PetscFree(a->j);CHKERRQ(ierr);
324     }
325     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
326     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
327     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
328     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
329     ierr = PetscFree(a);CHKERRQ(ierr);
330 
331 
332     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
333     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
334 
335     /*
336        This is horrible, horrible code. We need to keep the
337       A pointers for the bops and ops but copy everything
338       else from C.
339     */
340     Abops    = A->bops;
341     Aops     = A->ops;
342     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
343     A->bops  = Abops;
344     A->ops   = Aops;
345     A->qlist = 0;
346 
347     PetscHeaderDestroy(C);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNC__
353 #define __FUNC__ "MatView_SeqBAIJ_Binary"
354 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
355 {
356   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
357   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
358   Scalar      *aa;
359   FILE        *file;
360 
361   PetscFunctionBegin;
362   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
363   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
364   col_lens[0] = MAT_COOKIE;
365 
366   col_lens[1] = a->m;
367   col_lens[2] = a->n;
368   col_lens[3] = a->nz*bs2;
369 
370   /* store lengths of each row and write (including header) to file */
371   count = 0;
372   for ( i=0; i<a->mbs; i++ ) {
373     for ( j=0; j<bs; j++ ) {
374       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
375     }
376   }
377   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
378   ierr = PetscFree(col_lens);CHKERRQ(ierr);
379 
380   /* store column indices (zero start index) */
381   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
382   count = 0;
383   for ( i=0; i<a->mbs; i++ ) {
384     for ( j=0; j<bs; j++ ) {
385       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
386         for ( l=0; l<bs; l++ ) {
387           jj[count++] = bs*a->j[k] + l;
388         }
389       }
390     }
391   }
392   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
393   ierr = PetscFree(jj);CHKERRQ(ierr);
394 
395   /* store nonzero values */
396   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
397   count = 0;
398   for ( i=0; i<a->mbs; i++ ) {
399     for ( j=0; j<bs; j++ ) {
400       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
401         for ( l=0; l<bs; l++ ) {
402           aa[count++] = a->a[bs2*k + l*bs + j];
403         }
404       }
405     }
406   }
407   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
408   ierr = PetscFree(aa);CHKERRQ(ierr);
409 
410   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
411   if (file) {
412     fprintf(file,"-matload_block_size %d\n",a->bs);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
419 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
420 {
421   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
422   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
423   FILE        *fd;
424   char        *outputname;
425 
426   PetscFunctionBegin;
427   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
428   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
429   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
430   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
431     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
432   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
433     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
434   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
435     for ( i=0; i<a->mbs; i++ ) {
436       for ( j=0; j<bs; j++ ) {
437         fprintf(fd,"row %d:",i*bs+j);
438         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
439           for ( l=0; l<bs; l++ ) {
440 #if defined(PETSC_USE_COMPLEX)
441             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
442               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
443                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
444             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
445               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
446                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
447             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
448               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
449             }
450 #else
451             if (a->a[bs2*k + l*bs + j] != 0.0) {
452               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
453             }
454 #endif
455           }
456         }
457         fprintf(fd,"\n");
458       }
459     }
460   } else {
461     for ( i=0; i<a->mbs; i++ ) {
462       for ( j=0; j<bs; j++ ) {
463         fprintf(fd,"row %d:",i*bs+j);
464         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
465           for ( l=0; l<bs; l++ ) {
466 #if defined(PETSC_USE_COMPLEX)
467             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
468               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
469                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
470             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
471               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
472                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
473             } else {
474               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
475             }
476 #else
477             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
478 #endif
479           }
480         }
481         fprintf(fd,"\n");
482       }
483     }
484   }
485   fflush(fd);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNC__
490 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
491 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
492 {
493   Mat          A = (Mat) Aa;
494   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
495   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
496   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
497   MatScalar    *aa;
498   MPI_Comm     comm;
499   Viewer       viewer;
500 
501   PetscFunctionBegin;
502   /*
503       This is nasty. If this is called from an originally parallel matrix
504    then all processes call this, but only the first has the matrix so the
505    rest should return immediately.
506   */
507   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
508   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
509   if (rank) PetscFunctionReturn(0);
510 
511   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
512 
513   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
514 
515   /* loop over matrix elements drawing boxes */
516   color = DRAW_BLUE;
517   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
518     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
519       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
520       x_l = a->j[j]*bs; x_r = x_l + 1.0;
521       aa = a->a + j*bs2;
522       for ( k=0; k<bs; k++ ) {
523         for ( l=0; l<bs; l++ ) {
524           if (PetscReal(*aa++) >=  0.) continue;
525           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   color = DRAW_CYAN;
531   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
532     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
533       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
534       x_l = a->j[j]*bs; x_r = x_l + 1.0;
535       aa = a->a + j*bs2;
536       for ( k=0; k<bs; k++ ) {
537         for ( l=0; l<bs; l++ ) {
538           if (PetscReal(*aa++) != 0.) continue;
539           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
540         }
541       }
542     }
543   }
544 
545   color = DRAW_RED;
546   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
547     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
548       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
549       x_l = a->j[j]*bs; x_r = x_l + 1.0;
550       aa = a->a + j*bs2;
551       for ( k=0; k<bs; k++ ) {
552         for ( l=0; l<bs; l++ ) {
553           if (PetscReal(*aa++) <= 0.) continue;
554           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
555         }
556       }
557     }
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNC__
563 #define __FUNC__ "MatView_SeqBAIJ_Draw"
564 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
565 {
566   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
567   int          ierr;
568   double       xl,yl,xr,yr,w,h;
569   Draw         draw;
570   PetscTruth   isnull;
571 
572   PetscFunctionBegin;
573 
574   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
575   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
576 
577   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
578   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
579   xr += w;    yr += h;  xl = -w;     yl = -h;
580   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
581   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
582   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNC__
587 #define __FUNC__ "MatView_SeqBAIJ"
588 int MatView_SeqBAIJ(Mat A,Viewer viewer)
589 {
590   int         ierr;
591   int         issocket,isascii,isbinary,isdraw;
592 
593   PetscFunctionBegin;
594   issocket = PetscTypeCompare(viewer,SOCKET_VIEWER);
595   isascii  = PetscTypeCompare(viewer,ASCII_VIEWER);
596   isbinary = PetscTypeCompare(viewer,BINARY_VIEWER);
597   isdraw   = PetscTypeCompare(viewer,DRAW_VIEWER);
598   if (issocket) {
599     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
600   } else if (isascii){
601     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
602   } else if (isbinary) {
603     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
604   } else if (isdraw) {
605     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetValues_SeqBAIJ"
615 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
616 {
617   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
618   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
619   int        *ai = a->i, *ailen = a->ilen;
620   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
621   MatScalar  *ap, *aa = a->a, zero = 0.0;
622 
623   PetscFunctionBegin;
624   for ( k=0; k<m; k++ ) { /* loop over rows */
625     row  = im[k]; brow = row/bs;
626     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
627     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
628     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
629     nrow = ailen[brow];
630     for ( l=0; l<n; l++ ) { /* loop over columns */
631       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
632       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
633       col  = in[l] ;
634       bcol = col/bs;
635       cidx = col%bs;
636       ridx = row%bs;
637       high = nrow;
638       low  = 0; /* assume unsorted */
639       while (high-low > 5) {
640         t = (low+high)/2;
641         if (rp[t] > bcol) high = t;
642         else             low  = t;
643       }
644       for ( i=low; i<high; i++ ) {
645         if (rp[i] > bcol) break;
646         if (rp[i] == bcol) {
647           *v++ = ap[bs2*i+bs*cidx+ridx];
648           goto finished;
649         }
650       }
651       *v++ = zero;
652       finished:;
653     }
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 
659 #undef __FUNC__
660 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
661 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
662 {
663   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
664   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
665   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
666   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
667   Scalar      *value = v;
668   MatScalar   *ap,*aa=a->a,*bap;
669 
670   PetscFunctionBegin;
671   if (roworiented) {
672     stepval = (n-1)*bs;
673   } else {
674     stepval = (m-1)*bs;
675   }
676   for ( k=0; k<m; k++ ) { /* loop over added rows */
677     row  = im[k];
678     if (row < 0) continue;
679 #if defined(PETSC_USE_BOPT_g)
680     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
681 #endif
682     rp   = aj + ai[row];
683     ap   = aa + bs2*ai[row];
684     rmax = imax[row];
685     nrow = ailen[row];
686     low  = 0;
687     for ( l=0; l<n; l++ ) { /* loop over added columns */
688       if (in[l] < 0) continue;
689 #if defined(PETSC_USE_BOPT_g)
690       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
691 #endif
692       col = in[l];
693       if (roworiented) {
694         value = v + k*(stepval+bs)*bs + l*bs;
695       } else {
696         value = v + l*(stepval+bs)*bs + k*bs;
697       }
698       if (!sorted) low = 0; high = nrow;
699       while (high-low > 7) {
700         t = (low+high)/2;
701         if (rp[t] > col) high = t;
702         else             low  = t;
703       }
704       for ( i=low; i<high; i++ ) {
705         if (rp[i] > col) break;
706         if (rp[i] == col) {
707           bap  = ap +  bs2*i;
708           if (roworiented) {
709             if (is == ADD_VALUES) {
710               for ( ii=0; ii<bs; ii++,value+=stepval ) {
711                 for (jj=ii; jj<bs2; jj+=bs ) {
712                   bap[jj] += *value++;
713                 }
714               }
715             } else {
716               for ( ii=0; ii<bs; ii++,value+=stepval ) {
717                 for (jj=ii; jj<bs2; jj+=bs ) {
718                   bap[jj] = *value++;
719                 }
720               }
721             }
722           } else {
723             if (is == ADD_VALUES) {
724               for ( ii=0; ii<bs; ii++,value+=stepval ) {
725                 for (jj=0; jj<bs; jj++ ) {
726                   *bap++ += *value++;
727                 }
728               }
729             } else {
730               for ( ii=0; ii<bs; ii++,value+=stepval ) {
731                 for (jj=0; jj<bs; jj++ ) {
732                   *bap++  = *value++;
733                 }
734               }
735             }
736           }
737           goto noinsert2;
738         }
739       }
740       if (nonew == 1) goto noinsert2;
741       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
742       if (nrow >= rmax) {
743         /* there is no extra room in row, therefore enlarge */
744         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
745         MatScalar *new_a;
746 
747         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
748 
749         /* malloc new storage space */
750         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
751         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
752         new_j   = (int *) (new_a + bs2*new_nz);
753         new_i   = new_j + new_nz;
754 
755         /* copy over old data into new slots */
756         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
757         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
758         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
759         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
760         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
761         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
762         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
763         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
764         /* free up old matrix storage */
765         ierr = PetscFree(a->a);CHKERRQ(ierr);
766         if (!a->singlemalloc) {
767           ierr = PetscFree(a->i);CHKERRQ(ierr);
768           ierr = PetscFree(a->j);CHKERRQ(ierr);
769         }
770         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
771         a->singlemalloc = 1;
772 
773         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
774         rmax = imax[row] = imax[row] + CHUNKSIZE;
775         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
776         a->maxnz += bs2*CHUNKSIZE;
777         a->reallocs++;
778         a->nz++;
779       }
780       N = nrow++ - 1;
781       /* shift up all the later entries in this row */
782       for ( ii=N; ii>=i; ii-- ) {
783         rp[ii+1] = rp[ii];
784         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
785       }
786       if (N >= i) {
787         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
788       }
789       rp[i] = col;
790       bap   = ap +  bs2*i;
791       if (roworiented) {
792         for ( ii=0; ii<bs; ii++,value+=stepval) {
793           for (jj=ii; jj<bs2; jj+=bs ) {
794             bap[jj] = *value++;
795           }
796         }
797       } else {
798         for ( ii=0; ii<bs; ii++,value+=stepval ) {
799           for (jj=0; jj<bs; jj++ ) {
800             *bap++  = *value++;
801           }
802         }
803       }
804       noinsert2:;
805       low = i;
806     }
807     ailen[row] = nrow;
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 
813 #undef __FUNC__
814 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
815 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
816 {
817   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
818   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
819   int        m = a->m,*ip, N, *ailen = a->ilen;
820   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
821   MatScalar  *aa = a->a, *ap;
822 
823   PetscFunctionBegin;
824   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
825 
826   if (m) rmax = ailen[0];
827   for ( i=1; i<mbs; i++ ) {
828     /* move each row back by the amount of empty slots (fshift) before it*/
829     fshift += imax[i-1] - ailen[i-1];
830     rmax   = PetscMax(rmax,ailen[i]);
831     if (fshift) {
832       ip = aj + ai[i]; ap = aa + bs2*ai[i];
833       N = ailen[i];
834       for ( j=0; j<N; j++ ) {
835         ip[j-fshift] = ip[j];
836         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
837       }
838     }
839     ai[i] = ai[i-1] + ailen[i-1];
840   }
841   if (mbs) {
842     fshift += imax[mbs-1] - ailen[mbs-1];
843     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
844   }
845   /* reset ilen and imax for each row */
846   for ( i=0; i<mbs; i++ ) {
847     ailen[i] = imax[i] = ai[i+1] - ai[i];
848   }
849   a->nz = ai[mbs];
850 
851   /* diagonals may have moved, so kill the diagonal pointers */
852   if (fshift && a->diag) {
853     ierr = PetscFree(a->diag);CHKERRQ(ierr);
854     PLogObjectMemory(A,-(m+1)*sizeof(int));
855     a->diag = 0;
856   }
857   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
858            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
859   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
860            a->reallocs);
861   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
862   a->reallocs          = 0;
863   A->info.nz_unneeded  = (double)fshift*bs2;
864 
865   PetscFunctionReturn(0);
866 }
867 
868 
869 
870 /*
871    This function returns an array of flags which indicate the locations of contiguous
872    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
873    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
874    Assume: sizes should be long enough to hold all the values.
875 */
876 #undef __FUNC__
877 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
878 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
879 {
880   int        i,j,k,row;
881   PetscTruth flg;
882 
883   PetscFunctionBegin;
884   for ( i=0,j=0; i<n; j++ ) {
885     row = idx[i];
886     if (row%bs!=0) { /* Not the begining of a block */
887       sizes[j] = 1;
888       i++;
889     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
890       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
891       i++;
892     } else { /* Begining of the block, so check if the complete block exists */
893       flg = PETSC_TRUE;
894       for ( k=1; k<bs; k++ ) {
895         if (row+k != idx[i+k]) { /* break in the block */
896           flg = PETSC_FALSE;
897           break;
898         }
899       }
900       if (flg == PETSC_TRUE) { /* No break in the bs */
901         sizes[j] = bs;
902         i+= bs;
903       } else {
904         sizes[j] = 1;
905         i++;
906       }
907     }
908   }
909   *bs_max = j;
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNC__
914 #define __FUNC__ "MatZeroRows_SeqBAIJ"
915 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
916 {
917   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
918   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
919   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
920   Scalar      zero = 0.0;
921   MatScalar   *aa;
922 
923   PetscFunctionBegin;
924   /* Make a copy of the IS and  sort it */
925   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
926   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
927 
928   /* allocate memory for rows,sizes */
929   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
930   sizes = rows + is_n;
931 
932   /* initialize copy IS valurs to rows, and sort them */
933   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
934   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
935   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
936   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
937 
938   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
939     row   = rows[j];
940     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
941     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
942     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
943     if (sizes[i] == bs) {
944       if (diag) {
945         if (baij->ilen[row/bs] > 0) {
946           baij->ilen[row/bs] = 1;
947           baij->j[baij->i[row/bs]] = row/bs;
948           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
949         }
950         /* Now insert all the diagoanl values for this bs */
951         for ( k=0; k<bs; k++ ) {
952           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
953         }
954       } else { /* (!diag) */
955         baij->ilen[row/bs] = 0;
956       } /* end (!diag) */
957     } else { /* (sizes[i] != bs) */
958 #if defined (PETSC_USE_DEBUG)
959       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
960 #endif
961       for ( k=0; k<count; k++ ) {
962         aa[0] = zero;
963         aa+=bs;
964       }
965       if (diag) {
966         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
967       }
968     }
969   }
970 
971   ierr = PetscFree(rows);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNC__
977 #define __FUNC__ "MatSetValues_SeqBAIJ"
978 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
979 {
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
981   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
982   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
983   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
984   int         ridx,cidx,bs2=a->bs2,ierr;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for ( k=0; k<m; k++ ) { /* loop over added rows */
989     row  = im[k]; brow = row/bs;
990     if (row < 0) continue;
991 #if defined(PETSC_USE_BOPT_g)
992     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
993 #endif
994     rp   = aj + ai[brow];
995     ap   = aa + bs2*ai[brow];
996     rmax = imax[brow];
997     nrow = ailen[brow];
998     low  = 0;
999     for ( l=0; l<n; l++ ) { /* loop over added columns */
1000       if (in[l] < 0) continue;
1001 #if defined(PETSC_USE_BOPT_g)
1002       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1003 #endif
1004       col = in[l]; bcol = col/bs;
1005       ridx = row % bs; cidx = col % bs;
1006       if (roworiented) {
1007         value = v[l + k*n];
1008       } else {
1009         value = v[k + l*m];
1010       }
1011       if (!sorted) low = 0; high = nrow;
1012       while (high-low > 7) {
1013         t = (low+high)/2;
1014         if (rp[t] > bcol) high = t;
1015         else              low  = t;
1016       }
1017       for ( i=low; i<high; i++ ) {
1018         if (rp[i] > bcol) break;
1019         if (rp[i] == bcol) {
1020           bap  = ap +  bs2*i + bs*cidx + ridx;
1021           if (is == ADD_VALUES) *bap += value;
1022           else                  *bap  = value;
1023           goto noinsert1;
1024         }
1025       }
1026       if (nonew == 1) goto noinsert1;
1027       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1028       if (nrow >= rmax) {
1029         /* there is no extra room in row, therefore enlarge */
1030         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1031         MatScalar *new_a;
1032 
1033         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1034 
1035         /* Malloc new storage space */
1036         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1037         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1038         new_j   = (int *) (new_a + bs2*new_nz);
1039         new_i   = new_j + new_nz;
1040 
1041         /* copy over old data into new slots */
1042         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1043         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1044         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1045         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1046         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1047         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1048         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1049         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1050         /* free up old matrix storage */
1051         ierr = PetscFree(a->a);CHKERRQ(ierr);
1052         if (!a->singlemalloc) {
1053           ierr = PetscFree(a->i);CHKERRQ(ierr);
1054           ierr = PetscFree(a->j);CHKERRQ(ierr);
1055         }
1056         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1057         a->singlemalloc = 1;
1058 
1059         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1060         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1061         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1062         a->maxnz += bs2*CHUNKSIZE;
1063         a->reallocs++;
1064         a->nz++;
1065       }
1066       N = nrow++ - 1;
1067       /* shift up all the later entries in this row */
1068       for ( ii=N; ii>=i; ii-- ) {
1069         rp[ii+1] = rp[ii];
1070         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1071       }
1072       if (N>=i) {
1073         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1074       }
1075       rp[i]                      = bcol;
1076       ap[bs2*i + bs*cidx + ridx] = value;
1077       noinsert1:;
1078       low = i;
1079     }
1080     ailen[brow] = nrow;
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1086 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1087 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1088 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1089 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1090 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1091 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1092 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1093 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1094 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1095 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1096 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1097 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1098 extern int MatZeroEntries_SeqBAIJ(Mat);
1099 
1100 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1101 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1102 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1103 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1104 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1105 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1106 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1107 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1108 
1109 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1110 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1111 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1112 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1113 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1114 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1115 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1116 
1117 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1118 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1119 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1120 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1121 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1122 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1123 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1124 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1125 
1126 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1127 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1128 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1129 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1130 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1131 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1132 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1133 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1134 
1135 #undef __FUNC__
1136 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1137 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1138 {
1139   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1140   Mat         outA;
1141   int         ierr;
1142   PetscTruth  row_identity, col_identity;
1143 
1144   PetscFunctionBegin;
1145   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1146   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1147   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1148   if (!row_identity || !col_identity) {
1149     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1150   }
1151 
1152   outA          = inA;
1153   inA->factor   = FACTOR_LU;
1154   a->row        = row;
1155   a->col        = col;
1156 
1157   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1158   ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1159   PLogObjectParent(inA,a->icol);
1160 
1161   if (!a->solve_work) {
1162     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1163     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1164   }
1165 
1166   if (!a->diag) {
1167     ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr);
1168   }
1169   /*
1170       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1171       for ILU(0) factorization with natural ordering
1172   */
1173   switch (a->bs) {
1174     case 2:
1175     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1176     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1177     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1178     break;
1179   case 3:
1180     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1181     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1182     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1183     break;
1184   case 4:
1185     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1186     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1187     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1188     break;
1189   case 5:
1190     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1191     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1192     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1193     break;
1194   case 6:
1195     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1196     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1197     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1198     break;
1199   case 7:
1200     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1201     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1202     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1203     break;
1204   }
1205 
1206   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1207 
1208   PetscFunctionReturn(0);
1209 }
1210 #undef __FUNC__
1211 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1212 int MatPrintHelp_SeqBAIJ(Mat A)
1213 {
1214   static int called = 0;
1215   MPI_Comm   comm = A->comm;
1216   int        ierr;
1217 
1218   PetscFunctionBegin;
1219   if (called) {PetscFunctionReturn(0);} else called = 1;
1220   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1221   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 EXTERN_C_BEGIN
1226 #undef __FUNC__
1227 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1228 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1229 {
1230   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1231   int         i,nz,n;
1232 
1233   PetscFunctionBegin;
1234   nz = baij->maxnz;
1235   n  = baij->n;
1236   for (i=0; i<nz; i++) {
1237     baij->j[i] = indices[i];
1238   }
1239   baij->nz = nz;
1240   for ( i=0; i<n; i++ ) {
1241     baij->ilen[i] = baij->imax[i];
1242   }
1243 
1244   PetscFunctionReturn(0);
1245 }
1246 EXTERN_C_END
1247 
1248 #undef __FUNC__
1249 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1250 /*@
1251     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1252        in the matrix.
1253 
1254   Input Parameters:
1255 +  mat - the SeqBAIJ matrix
1256 -  indices - the column indices
1257 
1258   Level: advanced
1259 
1260   Notes:
1261     This can be called if you have precomputed the nonzero structure of the
1262   matrix and want to provide it to the matrix object to improve the performance
1263   of the MatSetValues() operation.
1264 
1265     You MUST have set the correct numbers of nonzeros per row in the call to
1266   MatCreateSeqBAIJ().
1267 
1268     MUST be called before any calls to MatSetValues();
1269 
1270 @*/
1271 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1272 {
1273   int ierr,(*f)(Mat,int *);
1274 
1275   PetscFunctionBegin;
1276   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1277   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1278   if (f) {
1279     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1280   } else {
1281     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1282   }
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 /* -------------------------------------------------------------------*/
1287 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1288        MatGetRow_SeqBAIJ,
1289        MatRestoreRow_SeqBAIJ,
1290        MatMult_SeqBAIJ_N,
1291        MatMultAdd_SeqBAIJ_N,
1292        MatMultTrans_SeqBAIJ,
1293        MatMultTransAdd_SeqBAIJ,
1294        MatSolve_SeqBAIJ_N,
1295        0,
1296        0,
1297        0,
1298        MatLUFactor_SeqBAIJ,
1299        0,
1300        0,
1301        MatTranspose_SeqBAIJ,
1302        MatGetInfo_SeqBAIJ,
1303        MatEqual_SeqBAIJ,
1304        MatGetDiagonal_SeqBAIJ,
1305        MatDiagonalScale_SeqBAIJ,
1306        MatNorm_SeqBAIJ,
1307        0,
1308        MatAssemblyEnd_SeqBAIJ,
1309        0,
1310        MatSetOption_SeqBAIJ,
1311        MatZeroEntries_SeqBAIJ,
1312        MatZeroRows_SeqBAIJ,
1313        MatLUFactorSymbolic_SeqBAIJ,
1314        MatLUFactorNumeric_SeqBAIJ_N,
1315        0,
1316        0,
1317        MatGetSize_SeqBAIJ,
1318        MatGetSize_SeqBAIJ,
1319        MatGetOwnershipRange_SeqBAIJ,
1320        MatILUFactorSymbolic_SeqBAIJ,
1321        0,
1322        0,
1323        0,
1324        MatDuplicate_SeqBAIJ,
1325        0,
1326        0,
1327        MatILUFactor_SeqBAIJ,
1328        0,
1329        0,
1330        MatGetSubMatrices_SeqBAIJ,
1331        MatIncreaseOverlap_SeqBAIJ,
1332        MatGetValues_SeqBAIJ,
1333        0,
1334        MatPrintHelp_SeqBAIJ,
1335        MatScale_SeqBAIJ,
1336        0,
1337        0,
1338        0,
1339        MatGetBlockSize_SeqBAIJ,
1340        MatGetRowIJ_SeqBAIJ,
1341        MatRestoreRowIJ_SeqBAIJ,
1342        0,
1343        0,
1344        0,
1345        0,
1346        0,
1347        0,
1348        MatSetValuesBlocked_SeqBAIJ,
1349        MatGetSubMatrix_SeqBAIJ,
1350        0,
1351        0,
1352        MatGetMaps_Petsc};
1353 
1354 EXTERN_C_BEGIN
1355 #undef __FUNC__
1356 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1357 int MatStoreValues_SeqBAIJ(Mat mat)
1358 {
1359   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1360   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1361   int         ierr;
1362 
1363   PetscFunctionBegin;
1364   if (aij->nonew != 1) {
1365     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1366   }
1367 
1368   /* allocate space for values if not already there */
1369   if (!aij->saved_values) {
1370     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1371   }
1372 
1373   /* copy values over */
1374   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 EXTERN_C_END
1378 
1379 EXTERN_C_BEGIN
1380 #undef __FUNC__
1381 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1382 int MatRetrieveValues_SeqBAIJ(Mat mat)
1383 {
1384   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1385   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1386 
1387   PetscFunctionBegin;
1388   if (aij->nonew != 1) {
1389     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1390   }
1391   if (!aij->saved_values) {
1392     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1393   }
1394 
1395   /* copy values over */
1396   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 EXTERN_C_END
1400 
1401 #undef __FUNC__
1402 #define __FUNC__ "MatCreateSeqBAIJ"
1403 /*@C
1404    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1405    compressed row) format.  For good matrix assembly performance the
1406    user should preallocate the matrix storage by setting the parameter nz
1407    (or the array nnz).  By setting these parameters accurately, performance
1408    during matrix assembly can be increased by more than a factor of 50.
1409 
1410    Collective on MPI_Comm
1411 
1412    Input Parameters:
1413 +  comm - MPI communicator, set to PETSC_COMM_SELF
1414 .  bs - size of block
1415 .  m - number of rows
1416 .  n - number of columns
1417 .  nz - number of block nonzeros per block row (same for all rows)
1418 -  nnz - array containing the number of block nonzeros in the various block rows
1419          (possibly different for each block row) or PETSC_NULL
1420 
1421    Output Parameter:
1422 .  A - the matrix
1423 
1424    Options Database Keys:
1425 .   -mat_no_unroll - uses code that does not unroll the loops in the
1426                      block calculations (much slower)
1427 .    -mat_block_size - size of the blocks to use
1428 
1429    Level: intermediate
1430 
1431    Notes:
1432    The block AIJ format is fully compatible with standard Fortran 77
1433    storage.  That is, the stored row and column indices can begin at
1434    either one (as in Fortran) or zero.  See the users' manual for details.
1435 
1436    Specify the preallocated storage with either nz or nnz (not both).
1437    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1438    allocation.  For additional details, see the users manual chapter on
1439    matrices.
1440 
1441 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1442 @*/
1443 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1444 {
1445   Mat         B;
1446   Mat_SeqBAIJ *b;
1447   int         i,len,ierr,flg,mbs,nbs,bs2,size;
1448 
1449   PetscFunctionBegin;
1450   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1451   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1452 
1453   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1454   mbs  = m/bs;
1455   nbs  = n/bs;
1456   bs2  = bs*bs;
1457 
1458   if (mbs*bs!=m || nbs*bs!=n) {
1459     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1460   }
1461 
1462   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1463   if (nnz) {
1464     for (i=0; i<mbs; i++) {
1465       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1466     }
1467   }
1468 
1469   *A = 0;
1470   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1471   PLogObjectCreate(B);
1472   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1473   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1474   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1475   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1476   if (!flg) {
1477     switch (bs) {
1478     case 1:
1479       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1480       B->ops->solve           = MatSolve_SeqBAIJ_1;
1481       B->ops->mult            = MatMult_SeqBAIJ_1;
1482       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1483       break;
1484     case 2:
1485       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1486       B->ops->solve           = MatSolve_SeqBAIJ_2;
1487       B->ops->mult            = MatMult_SeqBAIJ_2;
1488       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1489       break;
1490     case 3:
1491       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1492       B->ops->solve           = MatSolve_SeqBAIJ_3;
1493       B->ops->mult            = MatMult_SeqBAIJ_3;
1494       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1495       break;
1496     case 4:
1497       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1498       B->ops->solve           = MatSolve_SeqBAIJ_4;
1499       B->ops->mult            = MatMult_SeqBAIJ_4;
1500       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1501       break;
1502     case 5:
1503       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1504       B->ops->solve           = MatSolve_SeqBAIJ_5;
1505       B->ops->mult            = MatMult_SeqBAIJ_5;
1506       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1507       break;
1508     case 6:
1509       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1510       B->ops->solve           = MatSolve_SeqBAIJ_6;
1511       B->ops->mult            = MatMult_SeqBAIJ_6;
1512       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1513       break;
1514     case 7:
1515       B->ops->mult            = MatMult_SeqBAIJ_7;
1516       B->ops->solve           = MatSolve_SeqBAIJ_7;
1517       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1518       break;
1519     }
1520   }
1521   B->ops->destroy     = MatDestroy_SeqBAIJ;
1522   B->ops->view        = MatView_SeqBAIJ;
1523   B->factor           = 0;
1524   B->lupivotthreshold = 1.0;
1525   B->mapping          = 0;
1526   b->row              = 0;
1527   b->col              = 0;
1528   b->icol             = 0;
1529   b->reallocs         = 0;
1530   b->saved_values     = 0;
1531 
1532   b->m       = m; B->m = m; B->M = m;
1533   b->n       = n; B->n = n; B->N = n;
1534 
1535   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1536   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1537 
1538   b->mbs     = mbs;
1539   b->nbs     = nbs;
1540   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1541   if (nnz == PETSC_NULL) {
1542     if (nz == PETSC_DEFAULT) nz = 5;
1543     else if (nz <= 0)        nz = 1;
1544     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1545     nz = nz*mbs;
1546   } else {
1547     nz = 0;
1548     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1549   }
1550 
1551   /* allocate the matrix space */
1552   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1553   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1554   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1555   b->j  = (int *) (b->a + nz*bs2);
1556   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1557   b->i  = b->j + nz;
1558   b->singlemalloc = 1;
1559 
1560   b->i[0] = 0;
1561   for (i=1; i<mbs+1; i++) {
1562     b->i[i] = b->i[i-1] + b->imax[i-1];
1563   }
1564 
1565   /* b->ilen will count nonzeros in each block row so far. */
1566   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1567   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1568   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1569 
1570   b->bs               = bs;
1571   b->bs2              = bs2;
1572   b->mbs              = mbs;
1573   b->nz               = 0;
1574   b->maxnz            = nz*bs2;
1575   b->sorted           = 0;
1576   b->roworiented      = 1;
1577   b->nonew            = 0;
1578   b->diag             = 0;
1579   b->solve_work       = 0;
1580   b->mult_work        = 0;
1581   b->spptr            = 0;
1582   B->info.nz_unneeded = (double)b->maxnz;
1583 
1584   *A = B;
1585   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1586   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1587 
1588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1589                                      "MatStoreValues_SeqBAIJ",
1590                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1592                                      "MatRetrieveValues_SeqBAIJ",
1593                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1595                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1596                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNC__
1601 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1602 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1603 {
1604   Mat         C;
1605   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1606   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1607 
1608   PetscFunctionBegin;
1609   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1610 
1611   *B = 0;
1612   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1613   PLogObjectCreate(C);
1614   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1615   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1616   C->ops->destroy = MatDestroy_SeqBAIJ;
1617   C->ops->view    = MatView_SeqBAIJ;
1618   C->factor       = A->factor;
1619   c->row          = 0;
1620   c->col          = 0;
1621   c->icol         = 0;
1622   c->saved_values = 0;
1623   C->assembled    = PETSC_TRUE;
1624 
1625   c->m = C->m   = a->m;
1626   c->n = C->n   = a->n;
1627   C->M          = a->m;
1628   C->N          = a->n;
1629 
1630   c->bs         = a->bs;
1631   c->bs2        = a->bs2;
1632   c->mbs        = a->mbs;
1633   c->nbs        = a->nbs;
1634 
1635   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1636   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1637   for ( i=0; i<mbs; i++ ) {
1638     c->imax[i] = a->imax[i];
1639     c->ilen[i] = a->ilen[i];
1640   }
1641 
1642   /* allocate the matrix space */
1643   c->singlemalloc = 1;
1644   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1645   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1646   c->j = (int *) (c->a + nz*bs2);
1647   c->i = c->j + nz;
1648   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1649   if (mbs > 0) {
1650     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1651     if (cpvalues == MAT_COPY_VALUES) {
1652       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1653     } else {
1654       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1655     }
1656   }
1657 
1658   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1659   c->sorted      = a->sorted;
1660   c->roworiented = a->roworiented;
1661   c->nonew       = a->nonew;
1662 
1663   if (a->diag) {
1664     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1665     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1666     for ( i=0; i<mbs; i++ ) {
1667       c->diag[i] = a->diag[i];
1668     }
1669   } else c->diag        = 0;
1670   c->nz                 = a->nz;
1671   c->maxnz              = a->maxnz;
1672   c->solve_work         = 0;
1673   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1674   c->mult_work          = 0;
1675   *B = C;
1676   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNC__
1681 #define __FUNC__ "MatLoad_SeqBAIJ"
1682 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1683 {
1684   Mat_SeqBAIJ  *a;
1685   Mat          B;
1686   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1687   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1688   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1689   int          *masked, nmask,tmp,bs2,ishift;
1690   Scalar       *aa;
1691   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1692 
1693   PetscFunctionBegin;
1694   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1695   bs2  = bs*bs;
1696 
1697   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1698   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1699   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1700   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1701   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1702   M = header[1]; N = header[2]; nz = header[3];
1703 
1704   if (header[3] < 0) {
1705     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1706   }
1707 
1708   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1709 
1710   /*
1711      This code adds extra rows to make sure the number of rows is
1712     divisible by the blocksize
1713   */
1714   mbs        = M/bs;
1715   extra_rows = bs - M + bs*(mbs);
1716   if (extra_rows == bs) extra_rows = 0;
1717   else                  mbs++;
1718   if (extra_rows) {
1719     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1720   }
1721 
1722   /* read in row lengths */
1723   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1724   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1725   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1726 
1727   /* read in column indices */
1728   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1729   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1730   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1731 
1732   /* loop over row lengths determining block row lengths */
1733   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1734   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1735   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1736   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1737   masked      = mask + mbs;
1738   rowcount    = 0; nzcount = 0;
1739   for ( i=0; i<mbs; i++ ) {
1740     nmask = 0;
1741     for ( j=0; j<bs; j++ ) {
1742       kmax = rowlengths[rowcount];
1743       for ( k=0; k<kmax; k++ ) {
1744         tmp = jj[nzcount++]/bs;
1745         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1746       }
1747       rowcount++;
1748     }
1749     browlengths[i] += nmask;
1750     /* zero out the mask elements we set */
1751     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1752   }
1753 
1754   /* create our matrix */
1755   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1756   B = *A;
1757   a = (Mat_SeqBAIJ *) B->data;
1758 
1759   /* set matrix "i" values */
1760   a->i[0] = 0;
1761   for ( i=1; i<= mbs; i++ ) {
1762     a->i[i]      = a->i[i-1] + browlengths[i-1];
1763     a->ilen[i-1] = browlengths[i-1];
1764   }
1765   a->nz         = 0;
1766   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1767 
1768   /* read in nonzero values */
1769   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1770   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1771   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1772 
1773   /* set "a" and "j" values into matrix */
1774   nzcount = 0; jcount = 0;
1775   for ( i=0; i<mbs; i++ ) {
1776     nzcountb = nzcount;
1777     nmask    = 0;
1778     for ( j=0; j<bs; j++ ) {
1779       kmax = rowlengths[i*bs+j];
1780       for ( k=0; k<kmax; k++ ) {
1781         tmp = jj[nzcount++]/bs;
1782 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1783       }
1784       rowcount++;
1785     }
1786     /* sort the masked values */
1787     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1788 
1789     /* set "j" values into matrix */
1790     maskcount = 1;
1791     for ( j=0; j<nmask; j++ ) {
1792       a->j[jcount++]  = masked[j];
1793       mask[masked[j]] = maskcount++;
1794     }
1795     /* set "a" values into matrix */
1796     ishift = bs2*a->i[i];
1797     for ( j=0; j<bs; j++ ) {
1798       kmax = rowlengths[i*bs+j];
1799       for ( k=0; k<kmax; k++ ) {
1800         tmp       = jj[nzcountb]/bs ;
1801         block     = mask[tmp] - 1;
1802         point     = jj[nzcountb] - bs*tmp;
1803         idx       = ishift + bs2*block + j + bs*point;
1804         a->a[idx] = aa[nzcountb++];
1805       }
1806     }
1807     /* zero out the mask elements we set */
1808     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1809   }
1810   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1811 
1812   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1813   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1814   ierr = PetscFree(aa);CHKERRQ(ierr);
1815   ierr = PetscFree(jj);CHKERRQ(ierr);
1816   ierr = PetscFree(mask);CHKERRQ(ierr);
1817 
1818   B->assembled = PETSC_TRUE;
1819 
1820   ierr = MatView_Private(B);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 
1825 
1826