xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f8ab66085c45c137c30f0f04fe7fbbcd021b894e)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 #if defined(PETSC_HAVE_DSCPACK)
25 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
26 #endif
27 
28 #define CHUNKSIZE  10
29 
30 /*
31      Checks for missing diagonals
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
35 int MatMissingDiagonal_SeqBAIJ(Mat A)
36 {
37   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
38   int         *diag,*jj = a->j,i,ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
42   diag = a->diag;
43   for (i=0; i<a->mbs; i++) {
44     if (jj[diag[i]] != i) {
45       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
46     }
47   }
48   PetscFunctionReturn(0);
49 }
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
53 int MatMarkDiagonal_SeqBAIJ(Mat A)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
56   int         i,j,*diag,m = a->mbs,ierr;
57 
58   PetscFunctionBegin;
59   if (a->diag) PetscFunctionReturn(0);
60 
61   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
62   PetscLogObjectMemory(A,(m+1)*sizeof(int));
63   for (i=0; i<m; i++) {
64     diag[i] = a->i[i+1];
65     for (j=a->i[i]; j<a->i[i+1]; j++) {
66       if (a->j[j] == i) {
67         diag[i] = j;
68         break;
69       }
70     }
71   }
72   a->diag = diag;
73   PetscFunctionReturn(0);
74 }
75 
76 
77 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
81 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
82 {
83   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
84   int         ierr,n = a->mbs,i;
85 
86   PetscFunctionBegin;
87   *nn = n;
88   if (!ia) PetscFunctionReturn(0);
89   if (symmetric) {
90     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
91   } else if (oshift == 1) {
92     /* temporarily add 1 to i and j indices */
93     int nz = a->i[n];
94     for (i=0; i<nz; i++) a->j[i]++;
95     for (i=0; i<n+1; i++) a->i[i]++;
96     *ia = a->i; *ja = a->j;
97   } else {
98     *ia = a->i; *ja = a->j;
99   }
100 
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
106 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
107 {
108   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
109   int         i,n = a->mbs,ierr;
110 
111   PetscFunctionBegin;
112   if (!ia) PetscFunctionReturn(0);
113   if (symmetric) {
114     ierr = PetscFree(*ia);CHKERRQ(ierr);
115     ierr = PetscFree(*ja);CHKERRQ(ierr);
116   } else if (oshift == 1) {
117     int nz = a->i[n]-1;
118     for (i=0; i<nz; i++) a->j[i]--;
119     for (i=0; i<n+1; i++) a->i[i]--;
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
126 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
127 {
128   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
129 
130   PetscFunctionBegin;
131   *bs = baij->bs;
132   PetscFunctionReturn(0);
133 }
134 
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatDestroy_SeqBAIJ"
138 int MatDestroy_SeqBAIJ(Mat A)
139 {
140   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
141   int         ierr;
142 
143   PetscFunctionBegin;
144 #if defined(PETSC_USE_LOG)
145   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
146 #endif
147   ierr = PetscFree(a->a);CHKERRQ(ierr);
148   if (!a->singlemalloc) {
149     ierr = PetscFree(a->i);CHKERRQ(ierr);
150     ierr = PetscFree(a->j);CHKERRQ(ierr);
151   }
152   if (a->row) {
153     ierr = ISDestroy(a->row);CHKERRQ(ierr);
154   }
155   if (a->col) {
156     ierr = ISDestroy(a->col);CHKERRQ(ierr);
157   }
158   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
159   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
160   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
161   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
162   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
163   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
164   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
165 #if defined(PETSC_USE_MAT_SINGLE)
166   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
167 #endif
168   ierr = PetscFree(a);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatSetOption_SeqBAIJ"
174 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
175 {
176   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
177 
178   PetscFunctionBegin;
179   switch (op) {
180   case MAT_ROW_ORIENTED:
181     a->roworiented    = PETSC_TRUE;
182     break;
183   case MAT_COLUMN_ORIENTED:
184     a->roworiented    = PETSC_FALSE;
185     break;
186   case MAT_COLUMNS_SORTED:
187     a->sorted         = PETSC_TRUE;
188     break;
189   case MAT_COLUMNS_UNSORTED:
190     a->sorted         = PETSC_FALSE;
191     break;
192   case MAT_KEEP_ZEROED_ROWS:
193     a->keepzeroedrows = PETSC_TRUE;
194     break;
195   case MAT_NO_NEW_NONZERO_LOCATIONS:
196     a->nonew          = 1;
197     break;
198   case MAT_NEW_NONZERO_LOCATION_ERR:
199     a->nonew          = -1;
200     break;
201   case MAT_NEW_NONZERO_ALLOCATION_ERR:
202     a->nonew          = -2;
203     break;
204   case MAT_YES_NEW_NONZERO_LOCATIONS:
205     a->nonew          = 0;
206     break;
207   case MAT_ROWS_SORTED:
208   case MAT_ROWS_UNSORTED:
209   case MAT_YES_NEW_DIAGONALS:
210   case MAT_IGNORE_OFF_PROC_ENTRIES:
211   case MAT_USE_HASH_TABLE:
212     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
213     break;
214   case MAT_NO_NEW_DIAGONALS:
215     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
216   case MAT_USE_SINGLE_PRECISION_SOLVES:
217     if (a->bs==4) {
218       PetscTruth sse_enabled_local,sse_enabled_global;
219       int        ierr;
220 
221       sse_enabled_local  = PETSC_FALSE;
222       sse_enabled_global = PETSC_FALSE;
223 
224       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
225 #if defined(PETSC_HAVE_SSE)
226       if (sse_enabled_local) {
227           a->single_precision_solves = PETSC_TRUE;
228           A->ops->solve              = MatSolve_SeqBAIJ_Update;
229           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
230           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
231           break;
232       } else {
233         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
234       }
235 #else
236       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
237 #endif
238     }
239     break;
240   default:
241     SETERRQ(PETSC_ERR_SUP,"unknown option");
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatGetRow_SeqBAIJ"
248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
249 {
250   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
251   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
252   MatScalar    *aa,*aa_i;
253   PetscScalar  *v_i;
254 
255   PetscFunctionBegin;
256   bs  = a->bs;
257   ai  = a->i;
258   aj  = a->j;
259   aa  = a->a;
260   bs2 = a->bs2;
261 
262   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
263 
264   bn  = row/bs;   /* Block number */
265   bp  = row % bs; /* Block Position */
266   M   = ai[bn+1] - ai[bn];
267   *nz = bs*M;
268 
269   if (v) {
270     *v = 0;
271     if (*nz) {
272       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
273       for (i=0; i<M; i++) { /* for each block in the block row */
274         v_i  = *v + i*bs;
275         aa_i = aa + bs2*(ai[bn] + i);
276         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
277       }
278     }
279   }
280 
281   if (idx) {
282     *idx = 0;
283     if (*nz) {
284       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
285       for (i=0; i<M; i++) { /* for each block in the block row */
286         idx_i = *idx + i*bs;
287         itmp  = bs*aj[ai[bn] + i];
288         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
289       }
290     }
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
298 {
299   int ierr;
300 
301   PetscFunctionBegin;
302   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
303   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatTranspose_SeqBAIJ"
309 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
310 {
311   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
312   Mat         C;
313   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
314   int         *rows,*cols,bs2=a->bs2;
315   PetscScalar *array;
316 
317   PetscFunctionBegin;
318   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
319   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
320   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
321 
322 #if defined(PETSC_USE_MAT_SINGLE)
323   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
324   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
325 #else
326   array = a->a;
327 #endif
328 
329   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
330   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
331   ierr = PetscFree(col);CHKERRQ(ierr);
332   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
333   cols = rows + bs;
334   for (i=0; i<mbs; i++) {
335     cols[0] = i*bs;
336     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
337     len = ai[i+1] - ai[i];
338     for (j=0; j<len; j++) {
339       rows[0] = (*aj++)*bs;
340       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
341       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
342       array += bs2;
343     }
344   }
345   ierr = PetscFree(rows);CHKERRQ(ierr);
346 #if defined(PETSC_USE_MAT_SINGLE)
347   ierr = PetscFree(array);
348 #endif
349 
350   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352 
353   if (B) {
354     *B = C;
355   } else {
356     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
364 {
365   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
366   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
367   PetscScalar *aa;
368   FILE        *file;
369 
370   PetscFunctionBegin;
371   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
372   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
373   col_lens[0] = MAT_FILE_COOKIE;
374 
375   col_lens[1] = A->m;
376   col_lens[2] = A->n;
377   col_lens[3] = a->nz*bs2;
378 
379   /* store lengths of each row and write (including header) to file */
380   count = 0;
381   for (i=0; i<a->mbs; i++) {
382     for (j=0; j<bs; j++) {
383       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
384     }
385   }
386   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
387   ierr = PetscFree(col_lens);CHKERRQ(ierr);
388 
389   /* store column indices (zero start index) */
390   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
391   count = 0;
392   for (i=0; i<a->mbs; i++) {
393     for (j=0; j<bs; j++) {
394       for (k=a->i[i]; k<a->i[i+1]; k++) {
395         for (l=0; l<bs; l++) {
396           jj[count++] = bs*a->j[k] + l;
397         }
398       }
399     }
400   }
401   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
402   ierr = PetscFree(jj);CHKERRQ(ierr);
403 
404   /* store nonzero values */
405   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
406   count = 0;
407   for (i=0; i<a->mbs; i++) {
408     for (j=0; j<bs; j++) {
409       for (k=a->i[i]; k<a->i[i+1]; k++) {
410         for (l=0; l<bs; l++) {
411           aa[count++] = a->a[bs2*k + l*bs + j];
412         }
413       }
414     }
415   }
416   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
417   ierr = PetscFree(aa);CHKERRQ(ierr);
418 
419   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
420   if (file) {
421     fprintf(file,"-matload_block_size %d\n",a->bs);
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
442   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
443 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
444      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
445 #endif
446      PetscFunctionReturn(0);
447   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
448     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
449     for (i=0; i<a->mbs; i++) {
450       for (j=0; j<bs; j++) {
451         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
452         for (k=a->i[i]; k<a->i[i+1]; k++) {
453           for (l=0; l<bs; l++) {
454 #if defined(PETSC_USE_COMPLEX)
455             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
456               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
457                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
459               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
460                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
461             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
462               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
463             }
464 #else
465             if (a->a[bs2*k + l*bs + j] != 0.0) {
466               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
467             }
468 #endif
469           }
470         }
471         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
472       }
473     }
474     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
475   } else {
476     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
477     for (i=0; i<a->mbs; i++) {
478       for (j=0; j<bs; j++) {
479         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
480         for (k=a->i[i]; k<a->i[i+1]; k++) {
481           for (l=0; l<bs; l++) {
482 #if defined(PETSC_USE_COMPLEX)
483             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
484               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
485                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
487               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
488                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
489             } else {
490               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
491             }
492 #else
493             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
494 #endif
495           }
496         }
497         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
498       }
499     }
500     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
501   }
502   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
508 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
509 {
510   Mat          A = (Mat) Aa;
511   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
512   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
513   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
514   MatScalar    *aa;
515   PetscViewer  viewer;
516 
517   PetscFunctionBegin;
518 
519   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
520   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
521 
522   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
523 
524   /* loop over matrix elements drawing boxes */
525   color = PETSC_DRAW_BLUE;
526   for (i=0,row=0; i<mbs; i++,row+=bs) {
527     for (j=a->i[i]; j<a->i[i+1]; j++) {
528       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
529       x_l = a->j[j]*bs; x_r = x_l + 1.0;
530       aa = a->a + j*bs2;
531       for (k=0; k<bs; k++) {
532         for (l=0; l<bs; l++) {
533           if (PetscRealPart(*aa++) >=  0.) continue;
534           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
535         }
536       }
537     }
538   }
539   color = PETSC_DRAW_CYAN;
540   for (i=0,row=0; i<mbs; i++,row+=bs) {
541     for (j=a->i[i]; j<a->i[i+1]; j++) {
542       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
543       x_l = a->j[j]*bs; x_r = x_l + 1.0;
544       aa = a->a + j*bs2;
545       for (k=0; k<bs; k++) {
546         for (l=0; l<bs; l++) {
547           if (PetscRealPart(*aa++) != 0.) continue;
548           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
549         }
550       }
551     }
552   }
553 
554   color = PETSC_DRAW_RED;
555   for (i=0,row=0; i<mbs; i++,row+=bs) {
556     for (j=a->i[i]; j<a->i[i+1]; j++) {
557       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
558       x_l = a->j[j]*bs; x_r = x_l + 1.0;
559       aa = a->a + j*bs2;
560       for (k=0; k<bs; k++) {
561         for (l=0; l<bs; l++) {
562           if (PetscRealPart(*aa++) <= 0.) continue;
563           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
564         }
565       }
566     }
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
573 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
574 {
575   int          ierr;
576   PetscReal    xl,yl,xr,yr,w,h;
577   PetscDraw    draw;
578   PetscTruth   isnull;
579 
580   PetscFunctionBegin;
581 
582   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
583   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
584 
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
586   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
587   xr += w;    yr += h;  xl = -w;     yl = -h;
588   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
589   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
590   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatView_SeqBAIJ"
596 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
597 {
598   int        ierr;
599   PetscTruth issocket,isascii,isbinary,isdraw;
600 
601   PetscFunctionBegin;
602   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
603   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
604   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
606   if (issocket) {
607     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
608   } else if (isascii){
609     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
610   } else if (isbinary) {
611     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
612   } else if (isdraw) {
613     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
614   } else {
615     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "MatGetValues_SeqBAIJ"
623 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
624 {
625   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
626   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
627   int        *ai = a->i,*ailen = a->ilen;
628   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
629   MatScalar  *ap,*aa = a->a,zero = 0.0;
630 
631   PetscFunctionBegin;
632   for (k=0; k<m; k++) { /* loop over rows */
633     row  = im[k]; brow = row/bs;
634     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
635     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
636     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
637     nrow = ailen[brow];
638     for (l=0; l<n; l++) { /* loop over columns */
639       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
640       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
641       col  = in[l] ;
642       bcol = col/bs;
643       cidx = col%bs;
644       ridx = row%bs;
645       high = nrow;
646       low  = 0; /* assume unsorted */
647       while (high-low > 5) {
648         t = (low+high)/2;
649         if (rp[t] > bcol) high = t;
650         else             low  = t;
651       }
652       for (i=low; i<high; i++) {
653         if (rp[i] > bcol) break;
654         if (rp[i] == bcol) {
655           *v++ = ap[bs2*i+bs*cidx+ridx];
656           goto finished;
657         }
658       }
659       *v++ = zero;
660       finished:;
661     }
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #if defined(PETSC_USE_MAT_SINGLE)
667 #undef __FUNCT__
668 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
669 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
670 {
671   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
672   int         ierr,i,N = m*n*b->bs2;
673   MatScalar   *vsingle;
674 
675   PetscFunctionBegin;
676   if (N > b->setvalueslen) {
677     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
678     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
679     b->setvalueslen  = N;
680   }
681   vsingle = b->setvaluescopy;
682   for (i=0; i<N; i++) {
683     vsingle[i] = v[i];
684   }
685   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 #endif
689 
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
693 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
694 {
695   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
696   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
697   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
698   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
699   PetscTruth  roworiented=a->roworiented;
700   MatScalar   *value = v,*ap,*aa = a->a,*bap;
701 
702   PetscFunctionBegin;
703   if (roworiented) {
704     stepval = (n-1)*bs;
705   } else {
706     stepval = (m-1)*bs;
707   }
708   for (k=0; k<m; k++) { /* loop over added rows */
709     row  = im[k];
710     if (row < 0) continue;
711 #if defined(PETSC_USE_BOPT_g)
712     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
713 #endif
714     rp   = aj + ai[row];
715     ap   = aa + bs2*ai[row];
716     rmax = imax[row];
717     nrow = ailen[row];
718     low  = 0;
719     for (l=0; l<n; l++) { /* loop over added columns */
720       if (in[l] < 0) continue;
721 #if defined(PETSC_USE_BOPT_g)
722       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
723 #endif
724       col = in[l];
725       if (roworiented) {
726         value = v + k*(stepval+bs)*bs + l*bs;
727       } else {
728         value = v + l*(stepval+bs)*bs + k*bs;
729       }
730       if (!sorted) low = 0; high = nrow;
731       while (high-low > 7) {
732         t = (low+high)/2;
733         if (rp[t] > col) high = t;
734         else             low  = t;
735       }
736       for (i=low; i<high; i++) {
737         if (rp[i] > col) break;
738         if (rp[i] == col) {
739           bap  = ap +  bs2*i;
740           if (roworiented) {
741             if (is == ADD_VALUES) {
742               for (ii=0; ii<bs; ii++,value+=stepval) {
743                 for (jj=ii; jj<bs2; jj+=bs) {
744                   bap[jj] += *value++;
745                 }
746               }
747             } else {
748               for (ii=0; ii<bs; ii++,value+=stepval) {
749                 for (jj=ii; jj<bs2; jj+=bs) {
750                   bap[jj] = *value++;
751                 }
752               }
753             }
754           } else {
755             if (is == ADD_VALUES) {
756               for (ii=0; ii<bs; ii++,value+=stepval) {
757                 for (jj=0; jj<bs; jj++) {
758                   *bap++ += *value++;
759                 }
760               }
761             } else {
762               for (ii=0; ii<bs; ii++,value+=stepval) {
763                 for (jj=0; jj<bs; jj++) {
764                   *bap++  = *value++;
765                 }
766               }
767             }
768           }
769           goto noinsert2;
770         }
771       }
772       if (nonew == 1) goto noinsert2;
773       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
774       if (nrow >= rmax) {
775         /* there is no extra room in row, therefore enlarge */
776         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
777         MatScalar *new_a;
778 
779         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
780 
781         /* malloc new storage space */
782         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
783 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
784         new_j   = (int*)(new_a + bs2*new_nz);
785         new_i   = new_j + new_nz;
786 
787         /* copy over old data into new slots */
788         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
789         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
790         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
791         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
792         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
793         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
794         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
795         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
796         /* free up old matrix storage */
797         ierr = PetscFree(a->a);CHKERRQ(ierr);
798         if (!a->singlemalloc) {
799           ierr = PetscFree(a->i);CHKERRQ(ierr);
800           ierr = PetscFree(a->j);CHKERRQ(ierr);
801         }
802         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
803         a->singlemalloc = PETSC_TRUE;
804 
805         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
806         rmax = imax[row] = imax[row] + CHUNKSIZE;
807         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
808         a->maxnz += bs2*CHUNKSIZE;
809         a->reallocs++;
810         a->nz++;
811       }
812       N = nrow++ - 1;
813       /* shift up all the later entries in this row */
814       for (ii=N; ii>=i; ii--) {
815         rp[ii+1] = rp[ii];
816         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
817       }
818       if (N >= i) {
819         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
820       }
821       rp[i] = col;
822       bap   = ap +  bs2*i;
823       if (roworiented) {
824         for (ii=0; ii<bs; ii++,value+=stepval) {
825           for (jj=ii; jj<bs2; jj+=bs) {
826             bap[jj] = *value++;
827           }
828         }
829       } else {
830         for (ii=0; ii<bs; ii++,value+=stepval) {
831           for (jj=0; jj<bs; jj++) {
832             *bap++  = *value++;
833           }
834         }
835       }
836       noinsert2:;
837       low = i;
838     }
839     ailen[row] = nrow;
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 /* EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
847 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
848 {
849   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
850   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
851   int        m = A->m,*ip,N,*ailen = a->ilen;
852   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
853   MatScalar  *aa = a->a,*ap;
854 #if defined(PETSC_HAVE_DSCPACK)
855   PetscTruth   flag;
856 #endif
857 
858   PetscFunctionBegin;
859   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
860 
861   if (m) rmax = ailen[0];
862   for (i=1; i<mbs; i++) {
863     /* move each row back by the amount of empty slots (fshift) before it*/
864     fshift += imax[i-1] - ailen[i-1];
865     rmax   = PetscMax(rmax,ailen[i]);
866     if (fshift) {
867       ip = aj + ai[i]; ap = aa + bs2*ai[i];
868       N = ailen[i];
869       for (j=0; j<N; j++) {
870         ip[j-fshift] = ip[j];
871         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
872       }
873     }
874     ai[i] = ai[i-1] + ailen[i-1];
875   }
876   if (mbs) {
877     fshift += imax[mbs-1] - ailen[mbs-1];
878     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
879   }
880   /* reset ilen and imax for each row */
881   for (i=0; i<mbs; i++) {
882     ailen[i] = imax[i] = ai[i+1] - ai[i];
883   }
884   a->nz = ai[mbs];
885 
886   /* diagonals may have moved, so kill the diagonal pointers */
887   if (fshift && a->diag) {
888     ierr = PetscFree(a->diag);CHKERRQ(ierr);
889     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
890     a->diag = 0;
891   }
892   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
893   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
894   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
895   a->reallocs          = 0;
896   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
897 #if defined(PETSC_HAVE_DSCPACK)
898   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
899   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
900 #endif
901 
902 
903   PetscFunctionReturn(0);
904 }
905 
906 
907 
908 /*
909    This function returns an array of flags which indicate the locations of contiguous
910    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
911    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
912    Assume: sizes should be long enough to hold all the values.
913 */
914 #undef __FUNCT__
915 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
916 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
917 {
918   int        i,j,k,row;
919   PetscTruth flg;
920 
921   PetscFunctionBegin;
922   for (i=0,j=0; i<n; j++) {
923     row = idx[i];
924     if (row%bs!=0) { /* Not the begining of a block */
925       sizes[j] = 1;
926       i++;
927     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
928       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
929       i++;
930     } else { /* Begining of the block, so check if the complete block exists */
931       flg = PETSC_TRUE;
932       for (k=1; k<bs; k++) {
933         if (row+k != idx[i+k]) { /* break in the block */
934           flg = PETSC_FALSE;
935           break;
936         }
937       }
938       if (flg == PETSC_TRUE) { /* No break in the bs */
939         sizes[j] = bs;
940         i+= bs;
941       } else {
942         sizes[j] = 1;
943         i++;
944       }
945     }
946   }
947   *bs_max = j;
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
953 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
954 {
955   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
956   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
957   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
958   PetscScalar zero = 0.0;
959   MatScalar   *aa;
960 
961   PetscFunctionBegin;
962   /* Make a copy of the IS and  sort it */
963   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
964   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
965 
966   /* allocate memory for rows,sizes */
967   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
968   sizes = rows + is_n;
969 
970   /* copy IS values to rows, and sort them */
971   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
972   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
973   if (baij->keepzeroedrows) {
974     for (i=0; i<is_n; i++) { sizes[i] = 1; }
975     bs_max = is_n;
976   } else {
977     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
978   }
979   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
980 
981   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
982     row   = rows[j];
983     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
984     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
985     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
986     if (sizes[i] == bs && !baij->keepzeroedrows) {
987       if (diag) {
988         if (baij->ilen[row/bs] > 0) {
989           baij->ilen[row/bs]       = 1;
990           baij->j[baij->i[row/bs]] = row/bs;
991           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
992         }
993         /* Now insert all the diagonal values for this bs */
994         for (k=0; k<bs; k++) {
995           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
996         }
997       } else { /* (!diag) */
998         baij->ilen[row/bs] = 0;
999       } /* end (!diag) */
1000     } else { /* (sizes[i] != bs) */
1001 #if defined (PETSC_USE_DEBUG)
1002       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1003 #endif
1004       for (k=0; k<count; k++) {
1005         aa[0] =  zero;
1006         aa    += bs;
1007       }
1008       if (diag) {
1009         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1010       }
1011     }
1012   }
1013 
1014   ierr = PetscFree(rows);CHKERRQ(ierr);
1015   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1021 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1022 {
1023   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1024   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1025   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1026   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1027   int         ridx,cidx,bs2=a->bs2,ierr;
1028   PetscTruth  roworiented=a->roworiented;
1029   MatScalar   *ap,value,*aa=a->a,*bap;
1030 
1031   PetscFunctionBegin;
1032   for (k=0; k<m; k++) { /* loop over added rows */
1033     row  = im[k]; brow = row/bs;
1034     if (row < 0) continue;
1035 #if defined(PETSC_USE_BOPT_g)
1036     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1037 #endif
1038     rp   = aj + ai[brow];
1039     ap   = aa + bs2*ai[brow];
1040     rmax = imax[brow];
1041     nrow = ailen[brow];
1042     low  = 0;
1043     for (l=0; l<n; l++) { /* loop over added columns */
1044       if (in[l] < 0) continue;
1045 #if defined(PETSC_USE_BOPT_g)
1046       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1047 #endif
1048       col = in[l]; bcol = col/bs;
1049       ridx = row % bs; cidx = col % bs;
1050       if (roworiented) {
1051         value = v[l + k*n];
1052       } else {
1053         value = v[k + l*m];
1054       }
1055       if (!sorted) low = 0; high = nrow;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  bs2*i + bs*cidx + ridx;
1065           if (is == ADD_VALUES) *bap += value;
1066           else                  *bap  = value;
1067           goto noinsert1;
1068         }
1069       }
1070       if (nonew == 1) goto noinsert1;
1071       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1072       if (nrow >= rmax) {
1073         /* there is no extra room in row, therefore enlarge */
1074         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1075         MatScalar *new_a;
1076 
1077         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1078 
1079         /* Malloc new storage space */
1080         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1081 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1082         new_j   = (int*)(new_a + bs2*new_nz);
1083         new_i   = new_j + new_nz;
1084 
1085         /* copy over old data into new slots */
1086         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1087         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1088         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1089         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1090         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1091         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1092         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1093         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1094         /* free up old matrix storage */
1095         ierr = PetscFree(a->a);CHKERRQ(ierr);
1096         if (!a->singlemalloc) {
1097           ierr = PetscFree(a->i);CHKERRQ(ierr);
1098           ierr = PetscFree(a->j);CHKERRQ(ierr);
1099         }
1100         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1101         a->singlemalloc = PETSC_TRUE;
1102 
1103         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1104         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1105         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1106         a->maxnz += bs2*CHUNKSIZE;
1107         a->reallocs++;
1108         a->nz++;
1109       }
1110       N = nrow++ - 1;
1111       /* shift up all the later entries in this row */
1112       for (ii=N; ii>=i; ii--) {
1113         rp[ii+1] = rp[ii];
1114         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1115       }
1116       if (N>=i) {
1117         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1118       }
1119       rp[i]                      = bcol;
1120       ap[bs2*i + bs*cidx + ridx] = value;
1121       noinsert1:;
1122       low = i;
1123     }
1124     ailen[brow] = nrow;
1125   }
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1132 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1133 {
1134   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1135   Mat         outA;
1136   int         ierr;
1137   PetscTruth  row_identity,col_identity;
1138 
1139   PetscFunctionBegin;
1140   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1141   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1142   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1143   if (!row_identity || !col_identity) {
1144     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1145   }
1146 
1147   outA          = inA;
1148   inA->factor   = FACTOR_LU;
1149 
1150   if (!a->diag) {
1151     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1152   }
1153 
1154   a->row        = row;
1155   a->col        = col;
1156   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1157   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1158 
1159   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1160   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1161   PetscLogObjectParent(inA,a->icol);
1162 
1163   /*
1164       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1165       for ILU(0) factorization with natural ordering
1166   */
1167   if (a->bs < 8) {
1168     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1169   } else {
1170     if (!a->solve_work) {
1171       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1172       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1173     }
1174   }
1175 
1176   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1177 
1178   PetscFunctionReturn(0);
1179 }
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1182 int MatPrintHelp_SeqBAIJ(Mat A)
1183 {
1184   static PetscTruth called = PETSC_FALSE;
1185   MPI_Comm          comm = A->comm;
1186   int               ierr;
1187 
1188   PetscFunctionBegin;
1189   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1190   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1191   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 EXTERN_C_BEGIN
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1198 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1199 {
1200   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1201   int         i,nz,nbs;
1202 
1203   PetscFunctionBegin;
1204   nz  = baij->maxnz/baij->bs2;
1205   nbs = baij->nbs;
1206   for (i=0; i<nz; i++) {
1207     baij->j[i] = indices[i];
1208   }
1209   baij->nz = nz;
1210   for (i=0; i<nbs; i++) {
1211     baij->ilen[i] = baij->imax[i];
1212   }
1213 
1214   PetscFunctionReturn(0);
1215 }
1216 EXTERN_C_END
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1220 /*@
1221     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1222        in the matrix.
1223 
1224   Input Parameters:
1225 +  mat - the SeqBAIJ matrix
1226 -  indices - the column indices
1227 
1228   Level: advanced
1229 
1230   Notes:
1231     This can be called if you have precomputed the nonzero structure of the
1232   matrix and want to provide it to the matrix object to improve the performance
1233   of the MatSetValues() operation.
1234 
1235     You MUST have set the correct numbers of nonzeros per row in the call to
1236   MatCreateSeqBAIJ().
1237 
1238     MUST be called before any calls to MatSetValues();
1239 
1240 @*/
1241 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1242 {
1243   int ierr,(*f)(Mat,int *);
1244 
1245   PetscFunctionBegin;
1246   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1247   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1248   if (f) {
1249     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1250   } else {
1251     SETERRQ(1,"Wrong type of matrix to set column indices");
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1258 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1259 {
1260   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1261   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1262   PetscReal    atmp;
1263   PetscScalar  *x,zero = 0.0;
1264   MatScalar    *aa;
1265   int          ncols,brow,krow,kcol;
1266 
1267   PetscFunctionBegin;
1268   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1269   bs   = a->bs;
1270   aa   = a->a;
1271   ai   = a->i;
1272   aj   = a->j;
1273   mbs = a->mbs;
1274 
1275   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1276   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1277   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1278   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1279   for (i=0; i<mbs; i++) {
1280     ncols = ai[1] - ai[0]; ai++;
1281     brow  = bs*i;
1282     for (j=0; j<ncols; j++){
1283       /* bcol = bs*(*aj); */
1284       for (kcol=0; kcol<bs; kcol++){
1285         for (krow=0; krow<bs; krow++){
1286           atmp = PetscAbsScalar(*aa); aa++;
1287           row = brow + krow;    /* row index */
1288           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1289           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1290         }
1291       }
1292       aj++;
1293     }
1294   }
1295   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1301 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1302 {
1303   int        ierr;
1304 
1305   PetscFunctionBegin;
1306   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1312 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1313 {
1314   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1315   PetscFunctionBegin;
1316   *array = a->a;
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1322 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1323 {
1324   PetscFunctionBegin;
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 /* -------------------------------------------------------------------*/
1329 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1330        MatGetRow_SeqBAIJ,
1331        MatRestoreRow_SeqBAIJ,
1332        MatMult_SeqBAIJ_N,
1333        MatMultAdd_SeqBAIJ_N,
1334        MatMultTranspose_SeqBAIJ,
1335        MatMultTransposeAdd_SeqBAIJ,
1336        MatSolve_SeqBAIJ_N,
1337        0,
1338        0,
1339        0,
1340        MatLUFactor_SeqBAIJ,
1341        0,
1342        0,
1343        MatTranspose_SeqBAIJ,
1344        MatGetInfo_SeqBAIJ,
1345        MatEqual_SeqBAIJ,
1346        MatGetDiagonal_SeqBAIJ,
1347        MatDiagonalScale_SeqBAIJ,
1348        MatNorm_SeqBAIJ,
1349        0,
1350        MatAssemblyEnd_SeqBAIJ,
1351        0,
1352        MatSetOption_SeqBAIJ,
1353        MatZeroEntries_SeqBAIJ,
1354        MatZeroRows_SeqBAIJ,
1355        MatLUFactorSymbolic_SeqBAIJ,
1356        MatLUFactorNumeric_SeqBAIJ_N,
1357        0,
1358        0,
1359        MatSetUpPreallocation_SeqBAIJ,
1360        MatILUFactorSymbolic_SeqBAIJ,
1361        0,
1362        MatGetArray_SeqBAIJ,
1363        MatRestoreArray_SeqBAIJ,
1364        MatDuplicate_SeqBAIJ,
1365        0,
1366        0,
1367        MatILUFactor_SeqBAIJ,
1368        0,
1369        0,
1370        MatGetSubMatrices_SeqBAIJ,
1371        MatIncreaseOverlap_SeqBAIJ,
1372        MatGetValues_SeqBAIJ,
1373        0,
1374        MatPrintHelp_SeqBAIJ,
1375        MatScale_SeqBAIJ,
1376        0,
1377        0,
1378        0,
1379        MatGetBlockSize_SeqBAIJ,
1380        MatGetRowIJ_SeqBAIJ,
1381        MatRestoreRowIJ_SeqBAIJ,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        0,
1388        MatSetValuesBlocked_SeqBAIJ,
1389        MatGetSubMatrix_SeqBAIJ,
1390        MatDestroy_SeqBAIJ,
1391        MatView_SeqBAIJ,
1392        MatGetPetscMaps_Petsc,
1393        0,
1394        0,
1395        0,
1396        0,
1397        0,
1398        0,
1399        MatGetRowMax_SeqBAIJ,
1400        MatConvert_Basic};
1401 
1402 EXTERN_C_BEGIN
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1405 int MatStoreValues_SeqBAIJ(Mat mat)
1406 {
1407   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1408   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1409   int         ierr;
1410 
1411   PetscFunctionBegin;
1412   if (aij->nonew != 1) {
1413     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1414   }
1415 
1416   /* allocate space for values if not already there */
1417   if (!aij->saved_values) {
1418     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1419   }
1420 
1421   /* copy values over */
1422   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 EXTERN_C_END
1426 
1427 EXTERN_C_BEGIN
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1430 int MatRetrieveValues_SeqBAIJ(Mat mat)
1431 {
1432   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1433   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1434 
1435   PetscFunctionBegin;
1436   if (aij->nonew != 1) {
1437     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1438   }
1439   if (!aij->saved_values) {
1440     SETERRQ(1,"Must call MatStoreValues(A);first");
1441   }
1442 
1443   /* copy values over */
1444   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 EXTERN_C_END
1448 
1449 EXTERN_C_BEGIN
1450 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1451 EXTERN_C_END
1452 
1453 EXTERN_C_BEGIN
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatCreate_SeqBAIJ"
1456 int MatCreate_SeqBAIJ(Mat B)
1457 {
1458   int         ierr,size;
1459   Mat_SeqBAIJ *b;
1460 
1461   PetscFunctionBegin;
1462   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1463   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1464 
1465   B->m = B->M = PetscMax(B->m,B->M);
1466   B->n = B->N = PetscMax(B->n,B->N);
1467   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1468   B->data = (void*)b;
1469   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1470   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1471   B->factor           = 0;
1472   B->lupivotthreshold = 1.0;
1473   B->mapping          = 0;
1474   b->row              = 0;
1475   b->col              = 0;
1476   b->icol             = 0;
1477   b->reallocs         = 0;
1478   b->saved_values     = 0;
1479 #if defined(PETSC_USE_MAT_SINGLE)
1480   b->setvalueslen     = 0;
1481   b->setvaluescopy    = PETSC_NULL;
1482 #endif
1483   b->single_precision_solves = PETSC_FALSE;
1484 
1485   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1486   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1487 
1488   b->sorted           = PETSC_FALSE;
1489   b->roworiented      = PETSC_TRUE;
1490   b->nonew            = 0;
1491   b->diag             = 0;
1492   b->solve_work       = 0;
1493   b->mult_work        = 0;
1494   b->spptr            = 0;
1495   B->info.nz_unneeded = (PetscReal)b->maxnz;
1496   b->keepzeroedrows   = PETSC_FALSE;
1497 
1498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1499                                      "MatStoreValues_SeqBAIJ",
1500                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1502                                      "MatRetrieveValues_SeqBAIJ",
1503                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1505                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1506                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1508                                      "MatConvert_SeqBAIJ_SeqAIJ",
1509                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 EXTERN_C_END
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1516 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1517 {
1518   Mat         C;
1519   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1520   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1521 
1522   PetscFunctionBegin;
1523   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1524 
1525   *B = 0;
1526   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1527   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1528   c    = (Mat_SeqBAIJ*)C->data;
1529 
1530   c->bs         = a->bs;
1531   c->bs2        = a->bs2;
1532   c->mbs        = a->mbs;
1533   c->nbs        = a->nbs;
1534   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1535 
1536   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1537   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1538   for (i=0; i<mbs; i++) {
1539     c->imax[i] = a->imax[i];
1540     c->ilen[i] = a->ilen[i];
1541   }
1542 
1543   /* allocate the matrix space */
1544   c->singlemalloc = PETSC_TRUE;
1545   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1546   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1547   c->j = (int*)(c->a + nz*bs2);
1548   c->i = c->j + nz;
1549   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1550   if (mbs > 0) {
1551     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1552     if (cpvalues == MAT_COPY_VALUES) {
1553       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1554     } else {
1555       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1556     }
1557   }
1558 
1559   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1560   c->sorted      = a->sorted;
1561   c->roworiented = a->roworiented;
1562   c->nonew       = a->nonew;
1563 
1564   if (a->diag) {
1565     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1566     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1567     for (i=0; i<mbs; i++) {
1568       c->diag[i] = a->diag[i];
1569     }
1570   } else c->diag        = 0;
1571   c->nz                 = a->nz;
1572   c->maxnz              = a->maxnz;
1573   c->solve_work         = 0;
1574   c->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1575   c->mult_work          = 0;
1576   C->preallocated       = PETSC_TRUE;
1577   C->assembled          = PETSC_TRUE;
1578   *B = C;
1579   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 EXTERN_C_BEGIN
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatLoad_SeqBAIJ"
1586 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1587 {
1588   Mat_SeqBAIJ  *a;
1589   Mat          B;
1590   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1591   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1592   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1593   int          *masked,nmask,tmp,bs2,ishift;
1594   PetscScalar  *aa;
1595   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1596 
1597   PetscFunctionBegin;
1598   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1599   bs2  = bs*bs;
1600 
1601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1602   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1603   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1604   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1605   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1606   M = header[1]; N = header[2]; nz = header[3];
1607 
1608   if (header[3] < 0) {
1609     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1610   }
1611 
1612   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1613 
1614   /*
1615      This code adds extra rows to make sure the number of rows is
1616     divisible by the blocksize
1617   */
1618   mbs        = M/bs;
1619   extra_rows = bs - M + bs*(mbs);
1620   if (extra_rows == bs) extra_rows = 0;
1621   else                  mbs++;
1622   if (extra_rows) {
1623     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1624   }
1625 
1626   /* read in row lengths */
1627   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1628   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1629   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1630 
1631   /* read in column indices */
1632   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1633   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1634   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1635 
1636   /* loop over row lengths determining block row lengths */
1637   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1638   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1639   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1640   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1641   masked   = mask + mbs;
1642   rowcount = 0; nzcount = 0;
1643   for (i=0; i<mbs; i++) {
1644     nmask = 0;
1645     for (j=0; j<bs; j++) {
1646       kmax = rowlengths[rowcount];
1647       for (k=0; k<kmax; k++) {
1648         tmp = jj[nzcount++]/bs;
1649         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1650       }
1651       rowcount++;
1652     }
1653     browlengths[i] += nmask;
1654     /* zero out the mask elements we set */
1655     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1656   }
1657 
1658   /* create our matrix */
1659   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1660   B = *A;
1661   a = (Mat_SeqBAIJ*)B->data;
1662 
1663   /* set matrix "i" values */
1664   a->i[0] = 0;
1665   for (i=1; i<= mbs; i++) {
1666     a->i[i]      = a->i[i-1] + browlengths[i-1];
1667     a->ilen[i-1] = browlengths[i-1];
1668   }
1669   a->nz         = 0;
1670   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1671 
1672   /* read in nonzero values */
1673   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1674   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1675   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1676 
1677   /* set "a" and "j" values into matrix */
1678   nzcount = 0; jcount = 0;
1679   for (i=0; i<mbs; i++) {
1680     nzcountb = nzcount;
1681     nmask    = 0;
1682     for (j=0; j<bs; j++) {
1683       kmax = rowlengths[i*bs+j];
1684       for (k=0; k<kmax; k++) {
1685         tmp = jj[nzcount++]/bs;
1686 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1687       }
1688     }
1689     /* sort the masked values */
1690     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1691 
1692     /* set "j" values into matrix */
1693     maskcount = 1;
1694     for (j=0; j<nmask; j++) {
1695       a->j[jcount++]  = masked[j];
1696       mask[masked[j]] = maskcount++;
1697     }
1698     /* set "a" values into matrix */
1699     ishift = bs2*a->i[i];
1700     for (j=0; j<bs; j++) {
1701       kmax = rowlengths[i*bs+j];
1702       for (k=0; k<kmax; k++) {
1703         tmp       = jj[nzcountb]/bs ;
1704         block     = mask[tmp] - 1;
1705         point     = jj[nzcountb] - bs*tmp;
1706         idx       = ishift + bs2*block + j + bs*point;
1707         a->a[idx] = (MatScalar)aa[nzcountb++];
1708       }
1709     }
1710     /* zero out the mask elements we set */
1711     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1712   }
1713   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1714 
1715   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1716   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1717   ierr = PetscFree(aa);CHKERRQ(ierr);
1718   ierr = PetscFree(jj);CHKERRQ(ierr);
1719   ierr = PetscFree(mask);CHKERRQ(ierr);
1720 
1721   B->assembled = PETSC_TRUE;
1722 
1723   ierr = MatView_Private(B);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 EXTERN_C_END
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatCreateSeqBAIJ"
1730 /*@C
1731    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1732    compressed row) format.  For good matrix assembly performance the
1733    user should preallocate the matrix storage by setting the parameter nz
1734    (or the array nnz).  By setting these parameters accurately, performance
1735    during matrix assembly can be increased by more than a factor of 50.
1736 
1737    Collective on MPI_Comm
1738 
1739    Input Parameters:
1740 +  comm - MPI communicator, set to PETSC_COMM_SELF
1741 .  bs - size of block
1742 .  m - number of rows
1743 .  n - number of columns
1744 .  nz - number of nonzero blocks  per block row (same for all rows)
1745 -  nnz - array containing the number of nonzero blocks in the various block rows
1746          (possibly different for each block row) or PETSC_NULL
1747 
1748    Output Parameter:
1749 .  A - the matrix
1750 
1751    Options Database Keys:
1752 .   -mat_no_unroll - uses code that does not unroll the loops in the
1753                      block calculations (much slower)
1754 .    -mat_block_size - size of the blocks to use
1755 
1756    Level: intermediate
1757 
1758    Notes:
1759    A nonzero block is any block that as 1 or more nonzeros in it
1760 
1761    The block AIJ format is fully compatible with standard Fortran 77
1762    storage.  That is, the stored row and column indices can begin at
1763    either one (as in Fortran) or zero.  See the users' manual for details.
1764 
1765    Specify the preallocated storage with either nz or nnz (not both).
1766    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1767    allocation.  For additional details, see the users manual chapter on
1768    matrices.
1769 
1770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1771 @*/
1772 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1773 {
1774   int ierr;
1775 
1776   PetscFunctionBegin;
1777   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1778   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1779   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1785 /*@C
1786    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1787    per row in the matrix. For good matrix assembly performance the
1788    user should preallocate the matrix storage by setting the parameter nz
1789    (or the array nnz).  By setting these parameters accurately, performance
1790    during matrix assembly can be increased by more than a factor of 50.
1791 
1792    Collective on MPI_Comm
1793 
1794    Input Parameters:
1795 +  A - the matrix
1796 .  bs - size of block
1797 .  nz - number of block nonzeros per block row (same for all rows)
1798 -  nnz - array containing the number of block nonzeros in the various block rows
1799          (possibly different for each block row) or PETSC_NULL
1800 
1801    Options Database Keys:
1802 .   -mat_no_unroll - uses code that does not unroll the loops in the
1803                      block calculations (much slower)
1804 .    -mat_block_size - size of the blocks to use
1805 
1806    Level: intermediate
1807 
1808    Notes:
1809    The block AIJ format is fully compatible with standard Fortran 77
1810    storage.  That is, the stored row and column indices can begin at
1811    either one (as in Fortran) or zero.  See the users' manual for details.
1812 
1813    Specify the preallocated storage with either nz or nnz (not both).
1814    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1815    allocation.  For additional details, see the users manual chapter on
1816    matrices.
1817 
1818 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1819 @*/
1820 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1821 {
1822   Mat_SeqBAIJ *b;
1823   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1824   PetscTruth  flg;
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1828   if (!flg) PetscFunctionReturn(0);
1829 
1830   B->preallocated = PETSC_TRUE;
1831   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1832   if (nnz && newbs != bs) {
1833     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1834   }
1835   bs   = newbs;
1836 
1837   mbs  = B->m/bs;
1838   nbs  = B->n/bs;
1839   bs2  = bs*bs;
1840 
1841   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1842     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1843   }
1844 
1845   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1846   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1847   if (nnz) {
1848     for (i=0; i<mbs; i++) {
1849       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1850       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1851     }
1852   }
1853 
1854   b       = (Mat_SeqBAIJ*)B->data;
1855   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1856   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1857   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1858   if (!flg) {
1859     switch (bs) {
1860     case 1:
1861       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1862       B->ops->mult            = MatMult_SeqBAIJ_1;
1863       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1864       break;
1865     case 2:
1866       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1867       B->ops->mult            = MatMult_SeqBAIJ_2;
1868       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1869       break;
1870     case 3:
1871       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1872       B->ops->mult            = MatMult_SeqBAIJ_3;
1873       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1874       break;
1875     case 4:
1876       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1877       B->ops->mult            = MatMult_SeqBAIJ_4;
1878       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1879       break;
1880     case 5:
1881       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1882       B->ops->mult            = MatMult_SeqBAIJ_5;
1883       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1884       break;
1885     case 6:
1886       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1887       B->ops->mult            = MatMult_SeqBAIJ_6;
1888       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1889       break;
1890     case 7:
1891       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1892       B->ops->mult            = MatMult_SeqBAIJ_7;
1893       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1894       break;
1895     default:
1896       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1897       B->ops->mult            = MatMult_SeqBAIJ_N;
1898       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1899       break;
1900     }
1901   }
1902   b->bs      = bs;
1903   b->mbs     = mbs;
1904   b->nbs     = nbs;
1905   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1906   if (!nnz) {
1907     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1908     else if (nz <= 0)        nz = 1;
1909     for (i=0; i<mbs; i++) b->imax[i] = nz;
1910     nz = nz*mbs;
1911   } else {
1912     nz = 0;
1913     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1914   }
1915 
1916   /* allocate the matrix space */
1917   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1918   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1919   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1920   b->j  = (int*)(b->a + nz*bs2);
1921   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1922   b->i  = b->j + nz;
1923   b->singlemalloc = PETSC_TRUE;
1924 
1925   b->i[0] = 0;
1926   for (i=1; i<mbs+1; i++) {
1927     b->i[i] = b->i[i-1] + b->imax[i-1];
1928   }
1929 
1930   /* b->ilen will count nonzeros in each block row so far. */
1931   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1932   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1933   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1934 
1935   b->bs               = bs;
1936   b->bs2              = bs2;
1937   b->mbs              = mbs;
1938   b->nz               = 0;
1939   b->maxnz            = nz*bs2;
1940   B->info.nz_unneeded = (PetscReal)b->maxnz;
1941   PetscFunctionReturn(0);
1942 }
1943 
1944