xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7dad321e0144cbfaee16ffe4c4eed59fdeb38d69)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213   case MAT_USE_SINGLE_PRECISION_SOLVES:
214     if (a->bs==4) {
215       PetscTruth sse_enabled_local,sse_enabled_global;
216       int        ierr;
217 
218       sse_enabled_local  = PETSC_FALSE;
219       sse_enabled_global = PETSC_FALSE;
220 
221       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
222 #if defined(PETSC_HAVE_SSE)
223       if (sse_enabled_local) {
224           a->single_precision_solves = PETSC_TRUE;
225           A->ops->solve              = MatSolve_SeqBAIJ_Update;
226           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
227           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
228           break;
229       } else {
230         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
231       }
232 #else
233       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
234 #endif
235     }
236     break;
237   default:
238     SETERRQ(PETSC_ERR_SUP,"unknown option");
239   }
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatGetRow_SeqBAIJ"
245 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
246 {
247   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
248   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
249   MatScalar    *aa,*aa_i;
250   PetscScalar  *v_i;
251 
252   PetscFunctionBegin;
253   bs  = a->bs;
254   ai  = a->i;
255   aj  = a->j;
256   aa  = a->a;
257   bs2 = a->bs2;
258 
259   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
260 
261   bn  = row/bs;   /* Block number */
262   bp  = row % bs; /* Block Position */
263   M   = ai[bn+1] - ai[bn];
264   *nz = bs*M;
265 
266   if (v) {
267     *v = 0;
268     if (*nz) {
269       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
270       for (i=0; i<M; i++) { /* for each block in the block row */
271         v_i  = *v + i*bs;
272         aa_i = aa + bs2*(ai[bn] + i);
273         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
274       }
275     }
276   }
277 
278   if (idx) {
279     *idx = 0;
280     if (*nz) {
281       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
282       for (i=0; i<M; i++) { /* for each block in the block row */
283         idx_i = *idx + i*bs;
284         itmp  = bs*aj[ai[bn] + i];
285         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
286       }
287     }
288   }
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
294 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
295 {
296   int ierr;
297 
298   PetscFunctionBegin;
299   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
300   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatTranspose_SeqBAIJ"
306 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
307 {
308   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
309   Mat         C;
310   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
311   int         *rows,*cols,bs2=a->bs2;
312   PetscScalar *array;
313 
314   PetscFunctionBegin;
315   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
316   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
317   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
318 
319 #if defined(PETSC_USE_MAT_SINGLE)
320   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
321   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
322 #else
323   array = a->a;
324 #endif
325 
326   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
327   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
328   ierr = PetscFree(col);CHKERRQ(ierr);
329   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
330   cols = rows + bs;
331   for (i=0; i<mbs; i++) {
332     cols[0] = i*bs;
333     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
334     len = ai[i+1] - ai[i];
335     for (j=0; j<len; j++) {
336       rows[0] = (*aj++)*bs;
337       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
338       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
339       array += bs2;
340     }
341   }
342   ierr = PetscFree(rows);CHKERRQ(ierr);
343 #if defined(PETSC_USE_MAT_SINGLE)
344   ierr = PetscFree(array);
345 #endif
346 
347   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349 
350   if (B) {
351     *B = C;
352   } else {
353     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
360 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
361 {
362   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
363   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
364   PetscScalar *aa;
365   FILE        *file;
366 
367   PetscFunctionBegin;
368   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
369   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
370   col_lens[0] = MAT_FILE_COOKIE;
371 
372   col_lens[1] = A->m;
373   col_lens[2] = A->n;
374   col_lens[3] = a->nz*bs2;
375 
376   /* store lengths of each row and write (including header) to file */
377   count = 0;
378   for (i=0; i<a->mbs; i++) {
379     for (j=0; j<bs; j++) {
380       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
381     }
382   }
383   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
384   ierr = PetscFree(col_lens);CHKERRQ(ierr);
385 
386   /* store column indices (zero start index) */
387   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
388   count = 0;
389   for (i=0; i<a->mbs; i++) {
390     for (j=0; j<bs; j++) {
391       for (k=a->i[i]; k<a->i[i+1]; k++) {
392         for (l=0; l<bs; l++) {
393           jj[count++] = bs*a->j[k] + l;
394         }
395       }
396     }
397   }
398   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
399   ierr = PetscFree(jj);CHKERRQ(ierr);
400 
401   /* store nonzero values */
402   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
403   count = 0;
404   for (i=0; i<a->mbs; i++) {
405     for (j=0; j<bs; j++) {
406       for (k=a->i[i]; k<a->i[i+1]; k++) {
407         for (l=0; l<bs; l++) {
408           aa[count++] = a->a[bs2*k + l*bs + j];
409         }
410       }
411     }
412   }
413   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
414   ierr = PetscFree(aa);CHKERRQ(ierr);
415 
416   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
417   if (file) {
418     fprintf(file,"-matload_block_size %d\n",a->bs);
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
425 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
429   PetscViewerFormat format;
430 
431   PetscFunctionBegin;
432   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
433   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
434     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
435   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
436     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
437   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
438     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
439     for (i=0; i<a->mbs; i++) {
440       for (j=0; j<bs; j++) {
441         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
442         for (k=a->i[i]; k<a->i[i+1]; k++) {
443           for (l=0; l<bs; l++) {
444 #if defined(PETSC_USE_COMPLEX)
445             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
446               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
447                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
448             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
449               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
450                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
451             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
452               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             }
454 #else
455             if (a->a[bs2*k + l*bs + j] != 0.0) {
456               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
457             }
458 #endif
459           }
460         }
461         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
462       }
463     }
464     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
465   } else {
466     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
467     for (i=0; i<a->mbs; i++) {
468       for (j=0; j<bs; j++) {
469         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
470         for (k=a->i[i]; k<a->i[i+1]; k++) {
471           for (l=0; l<bs; l++) {
472 #if defined(PETSC_USE_COMPLEX)
473             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
474               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
475                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
476             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
477               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
478                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
479             } else {
480               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             }
482 #else
483             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
484 #endif
485           }
486         }
487         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
488       }
489     }
490     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
491   }
492   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
498 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
499 {
500   Mat          A = (Mat) Aa;
501   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
502   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
503   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
504   MatScalar    *aa;
505   PetscViewer  viewer;
506 
507   PetscFunctionBegin;
508 
509   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
510   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
511 
512   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
513 
514   /* loop over matrix elements drawing boxes */
515   color = PETSC_DRAW_BLUE;
516   for (i=0,row=0; i<mbs; i++,row+=bs) {
517     for (j=a->i[i]; j<a->i[i+1]; j++) {
518       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
519       x_l = a->j[j]*bs; x_r = x_l + 1.0;
520       aa = a->a + j*bs2;
521       for (k=0; k<bs; k++) {
522         for (l=0; l<bs; l++) {
523           if (PetscRealPart(*aa++) >=  0.) continue;
524           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
525         }
526       }
527     }
528   }
529   color = PETSC_DRAW_CYAN;
530   for (i=0,row=0; i<mbs; i++,row+=bs) {
531     for (j=a->i[i]; j<a->i[i+1]; j++) {
532       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
533       x_l = a->j[j]*bs; x_r = x_l + 1.0;
534       aa = a->a + j*bs2;
535       for (k=0; k<bs; k++) {
536         for (l=0; l<bs; l++) {
537           if (PetscRealPart(*aa++) != 0.) continue;
538           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543 
544   color = PETSC_DRAW_RED;
545   for (i=0,row=0; i<mbs; i++,row+=bs) {
546     for (j=a->i[i]; j<a->i[i+1]; j++) {
547       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
548       x_l = a->j[j]*bs; x_r = x_l + 1.0;
549       aa = a->a + j*bs2;
550       for (k=0; k<bs; k++) {
551         for (l=0; l<bs; l++) {
552           if (PetscRealPart(*aa++) <= 0.) continue;
553           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
554         }
555       }
556     }
557   }
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
563 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
564 {
565   int          ierr;
566   PetscReal    xl,yl,xr,yr,w,h;
567   PetscDraw    draw;
568   PetscTruth   isnull;
569 
570   PetscFunctionBegin;
571 
572   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
573   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
574 
575   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
576   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
577   xr += w;    yr += h;  xl = -w;     yl = -h;
578   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
579   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatView_SeqBAIJ"
586 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
587 {
588   int        ierr;
589   PetscTruth issocket,isascii,isbinary,isdraw;
590 
591   PetscFunctionBegin;
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
596   if (issocket) {
597     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
598   } else if (isascii){
599     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
600   } else if (isbinary) {
601     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
602   } else if (isdraw) {
603     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
604   } else {
605     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
606   }
607   PetscFunctionReturn(0);
608 }
609 
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatGetValues_SeqBAIJ"
613 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
614 {
615   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
616   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
617   int        *ai = a->i,*ailen = a->ilen;
618   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
619   MatScalar  *ap,*aa = a->a,zero = 0.0;
620 
621   PetscFunctionBegin;
622   for (k=0; k<m; k++) { /* loop over rows */
623     row  = im[k]; brow = row/bs;
624     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
625     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
626     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
627     nrow = ailen[brow];
628     for (l=0; l<n; l++) { /* loop over columns */
629       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
630       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
631       col  = in[l] ;
632       bcol = col/bs;
633       cidx = col%bs;
634       ridx = row%bs;
635       high = nrow;
636       low  = 0; /* assume unsorted */
637       while (high-low > 5) {
638         t = (low+high)/2;
639         if (rp[t] > bcol) high = t;
640         else             low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > bcol) break;
644         if (rp[i] == bcol) {
645           *v++ = ap[bs2*i+bs*cidx+ridx];
646           goto finished;
647         }
648       }
649       *v++ = zero;
650       finished:;
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 #if defined(PETSC_USE_MAT_SINGLE)
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
659 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
660 {
661   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
662   int         ierr,i,N = m*n*b->bs2;
663   MatScalar   *vsingle;
664 
665   PetscFunctionBegin;
666   if (N > b->setvalueslen) {
667     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
668     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
669     b->setvalueslen  = N;
670   }
671   vsingle = b->setvaluescopy;
672   for (i=0; i<N; i++) {
673     vsingle[i] = v[i];
674   }
675   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 #endif
679 
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
683 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
684 {
685   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
686   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
687   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
688   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
689   PetscTruth  roworiented=a->roworiented;
690   MatScalar   *value = v,*ap,*aa = a->a,*bap;
691 
692   PetscFunctionBegin;
693   if (roworiented) {
694     stepval = (n-1)*bs;
695   } else {
696     stepval = (m-1)*bs;
697   }
698   for (k=0; k<m; k++) { /* loop over added rows */
699     row  = im[k];
700     if (row < 0) continue;
701 #if defined(PETSC_USE_BOPT_g)
702     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
703 #endif
704     rp   = aj + ai[row];
705     ap   = aa + bs2*ai[row];
706     rmax = imax[row];
707     nrow = ailen[row];
708     low  = 0;
709     for (l=0; l<n; l++) { /* loop over added columns */
710       if (in[l] < 0) continue;
711 #if defined(PETSC_USE_BOPT_g)
712       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
713 #endif
714       col = in[l];
715       if (roworiented) {
716         value = v + k*(stepval+bs)*bs + l*bs;
717       } else {
718         value = v + l*(stepval+bs)*bs + k*bs;
719       }
720       if (!sorted) low = 0; high = nrow;
721       while (high-low > 7) {
722         t = (low+high)/2;
723         if (rp[t] > col) high = t;
724         else             low  = t;
725       }
726       for (i=low; i<high; i++) {
727         if (rp[i] > col) break;
728         if (rp[i] == col) {
729           bap  = ap +  bs2*i;
730           if (roworiented) {
731             if (is == ADD_VALUES) {
732               for (ii=0; ii<bs; ii++,value+=stepval) {
733                 for (jj=ii; jj<bs2; jj+=bs) {
734                   bap[jj] += *value++;
735                 }
736               }
737             } else {
738               for (ii=0; ii<bs; ii++,value+=stepval) {
739                 for (jj=ii; jj<bs2; jj+=bs) {
740                   bap[jj] = *value++;
741                 }
742               }
743             }
744           } else {
745             if (is == ADD_VALUES) {
746               for (ii=0; ii<bs; ii++,value+=stepval) {
747                 for (jj=0; jj<bs; jj++) {
748                   *bap++ += *value++;
749                 }
750               }
751             } else {
752               for (ii=0; ii<bs; ii++,value+=stepval) {
753                 for (jj=0; jj<bs; jj++) {
754                   *bap++  = *value++;
755                 }
756               }
757             }
758           }
759           goto noinsert2;
760         }
761       }
762       if (nonew == 1) goto noinsert2;
763       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
764       if (nrow >= rmax) {
765         /* there is no extra room in row, therefore enlarge */
766         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
767         MatScalar *new_a;
768 
769         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
770 
771         /* malloc new storage space */
772         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
773 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
774         new_j   = (int*)(new_a + bs2*new_nz);
775         new_i   = new_j + new_nz;
776 
777         /* copy over old data into new slots */
778         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
779         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
780         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
781         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
782         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
783         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
784         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
785         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
786         /* free up old matrix storage */
787         ierr = PetscFree(a->a);CHKERRQ(ierr);
788         if (!a->singlemalloc) {
789           ierr = PetscFree(a->i);CHKERRQ(ierr);
790           ierr = PetscFree(a->j);CHKERRQ(ierr);
791         }
792         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
793         a->singlemalloc = PETSC_TRUE;
794 
795         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
796         rmax = imax[row] = imax[row] + CHUNKSIZE;
797         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
798         a->maxnz += bs2*CHUNKSIZE;
799         a->reallocs++;
800         a->nz++;
801       }
802       N = nrow++ - 1;
803       /* shift up all the later entries in this row */
804       for (ii=N; ii>=i; ii--) {
805         rp[ii+1] = rp[ii];
806         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
807       }
808       if (N >= i) {
809         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
810       }
811       rp[i] = col;
812       bap   = ap +  bs2*i;
813       if (roworiented) {
814         for (ii=0; ii<bs; ii++,value+=stepval) {
815           for (jj=ii; jj<bs2; jj+=bs) {
816             bap[jj] = *value++;
817           }
818         }
819       } else {
820         for (ii=0; ii<bs; ii++,value+=stepval) {
821           for (jj=0; jj<bs; jj++) {
822             *bap++  = *value++;
823           }
824         }
825       }
826       noinsert2:;
827       low = i;
828     }
829     ailen[row] = nrow;
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
835 #undef __FUNCT__
836 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
837 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
838 {
839   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
840   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
841   int        m = A->m,*ip,N,*ailen = a->ilen;
842   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
843   MatScalar  *aa = a->a,*ap;
844 #if defined(PETSC_HAVE_DSCPACK)
845   PetscTruth   flag;
846 #endif
847 
848   PetscFunctionBegin;
849   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
850 
851   if (m) rmax = ailen[0];
852   for (i=1; i<mbs; i++) {
853     /* move each row back by the amount of empty slots (fshift) before it*/
854     fshift += imax[i-1] - ailen[i-1];
855     rmax   = PetscMax(rmax,ailen[i]);
856     if (fshift) {
857       ip = aj + ai[i]; ap = aa + bs2*ai[i];
858       N = ailen[i];
859       for (j=0; j<N; j++) {
860         ip[j-fshift] = ip[j];
861         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
862       }
863     }
864     ai[i] = ai[i-1] + ailen[i-1];
865   }
866   if (mbs) {
867     fshift += imax[mbs-1] - ailen[mbs-1];
868     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
869   }
870   /* reset ilen and imax for each row */
871   for (i=0; i<mbs; i++) {
872     ailen[i] = imax[i] = ai[i+1] - ai[i];
873   }
874   a->nz = ai[mbs];
875 
876   /* diagonals may have moved, so kill the diagonal pointers */
877   if (fshift && a->diag) {
878     ierr = PetscFree(a->diag);CHKERRQ(ierr);
879     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
880     a->diag = 0;
881   }
882   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
883   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
885   a->reallocs          = 0;
886   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
887 #if defined(PETSC_HAVE_DSCPACK)
888   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
889   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
890 #endif
891 
892 
893   PetscFunctionReturn(0);
894 }
895 
896 
897 
898 /*
899    This function returns an array of flags which indicate the locations of contiguous
900    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
901    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
902    Assume: sizes should be long enough to hold all the values.
903 */
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
906 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
907 {
908   int        i,j,k,row;
909   PetscTruth flg;
910 
911   PetscFunctionBegin;
912   for (i=0,j=0; i<n; j++) {
913     row = idx[i];
914     if (row%bs!=0) { /* Not the begining of a block */
915       sizes[j] = 1;
916       i++;
917     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
918       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
919       i++;
920     } else { /* Begining of the block, so check if the complete block exists */
921       flg = PETSC_TRUE;
922       for (k=1; k<bs; k++) {
923         if (row+k != idx[i+k]) { /* break in the block */
924           flg = PETSC_FALSE;
925           break;
926         }
927       }
928       if (flg == PETSC_TRUE) { /* No break in the bs */
929         sizes[j] = bs;
930         i+= bs;
931       } else {
932         sizes[j] = 1;
933         i++;
934       }
935     }
936   }
937   *bs_max = j;
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
943 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
944 {
945   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
946   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
947   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
948   PetscScalar zero = 0.0;
949   MatScalar   *aa;
950 
951   PetscFunctionBegin;
952   /* Make a copy of the IS and  sort it */
953   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
954   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
955 
956   /* allocate memory for rows,sizes */
957   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
958   sizes = rows + is_n;
959 
960   /* copy IS values to rows, and sort them */
961   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
962   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
963   if (baij->keepzeroedrows) {
964     for (i=0; i<is_n; i++) { sizes[i] = 1; }
965     bs_max = is_n;
966   } else {
967     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
968   }
969   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
970 
971   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
972     row   = rows[j];
973     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
974     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
975     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
976     if (sizes[i] == bs && !baij->keepzeroedrows) {
977       if (diag) {
978         if (baij->ilen[row/bs] > 0) {
979           baij->ilen[row/bs]       = 1;
980           baij->j[baij->i[row/bs]] = row/bs;
981           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
982         }
983         /* Now insert all the diagonal values for this bs */
984         for (k=0; k<bs; k++) {
985           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
986         }
987       } else { /* (!diag) */
988         baij->ilen[row/bs] = 0;
989       } /* end (!diag) */
990     } else { /* (sizes[i] != bs) */
991 #if defined (PETSC_USE_DEBUG)
992       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
993 #endif
994       for (k=0; k<count; k++) {
995         aa[0] =  zero;
996         aa    += bs;
997       }
998       if (diag) {
999         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1000       }
1001     }
1002   }
1003 
1004   ierr = PetscFree(rows);CHKERRQ(ierr);
1005   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1011 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1012 {
1013   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1014   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1015   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1016   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1017   int         ridx,cidx,bs2=a->bs2,ierr;
1018   PetscTruth  roworiented=a->roworiented;
1019   MatScalar   *ap,value,*aa=a->a,*bap;
1020 
1021   PetscFunctionBegin;
1022   for (k=0; k<m; k++) { /* loop over added rows */
1023     row  = im[k]; brow = row/bs;
1024     if (row < 0) continue;
1025 #if defined(PETSC_USE_BOPT_g)
1026     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1027 #endif
1028     rp   = aj + ai[brow];
1029     ap   = aa + bs2*ai[brow];
1030     rmax = imax[brow];
1031     nrow = ailen[brow];
1032     low  = 0;
1033     for (l=0; l<n; l++) { /* loop over added columns */
1034       if (in[l] < 0) continue;
1035 #if defined(PETSC_USE_BOPT_g)
1036       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1037 #endif
1038       col = in[l]; bcol = col/bs;
1039       ridx = row % bs; cidx = col % bs;
1040       if (roworiented) {
1041         value = v[l + k*n];
1042       } else {
1043         value = v[k + l*m];
1044       }
1045       if (!sorted) low = 0; high = nrow;
1046       while (high-low > 7) {
1047         t = (low+high)/2;
1048         if (rp[t] > bcol) high = t;
1049         else              low  = t;
1050       }
1051       for (i=low; i<high; i++) {
1052         if (rp[i] > bcol) break;
1053         if (rp[i] == bcol) {
1054           bap  = ap +  bs2*i + bs*cidx + ridx;
1055           if (is == ADD_VALUES) *bap += value;
1056           else                  *bap  = value;
1057           goto noinsert1;
1058         }
1059       }
1060       if (nonew == 1) goto noinsert1;
1061       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1062       if (nrow >= rmax) {
1063         /* there is no extra room in row, therefore enlarge */
1064         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1065         MatScalar *new_a;
1066 
1067         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1068 
1069         /* Malloc new storage space */
1070         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1071 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1072         new_j   = (int*)(new_a + bs2*new_nz);
1073         new_i   = new_j + new_nz;
1074 
1075         /* copy over old data into new slots */
1076         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1077         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1078         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1079         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1080         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1081         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1082         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1083         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1084         /* free up old matrix storage */
1085         ierr = PetscFree(a->a);CHKERRQ(ierr);
1086         if (!a->singlemalloc) {
1087           ierr = PetscFree(a->i);CHKERRQ(ierr);
1088           ierr = PetscFree(a->j);CHKERRQ(ierr);
1089         }
1090         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1091         a->singlemalloc = PETSC_TRUE;
1092 
1093         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1094         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1095         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1096         a->maxnz += bs2*CHUNKSIZE;
1097         a->reallocs++;
1098         a->nz++;
1099       }
1100       N = nrow++ - 1;
1101       /* shift up all the later entries in this row */
1102       for (ii=N; ii>=i; ii--) {
1103         rp[ii+1] = rp[ii];
1104         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1105       }
1106       if (N>=i) {
1107         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1108       }
1109       rp[i]                      = bcol;
1110       ap[bs2*i + bs*cidx + ridx] = value;
1111       noinsert1:;
1112       low = i;
1113     }
1114     ailen[brow] = nrow;
1115   }
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1122 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1123 {
1124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1125   Mat         outA;
1126   int         ierr;
1127   PetscTruth  row_identity,col_identity;
1128 
1129   PetscFunctionBegin;
1130   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1131   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1132   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1133   if (!row_identity || !col_identity) {
1134     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1135   }
1136 
1137   outA          = inA;
1138   inA->factor   = FACTOR_LU;
1139 
1140   if (!a->diag) {
1141     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1142   }
1143 
1144   a->row        = row;
1145   a->col        = col;
1146   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1147   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1148 
1149   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1150   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1151   PetscLogObjectParent(inA,a->icol);
1152 
1153   /*
1154       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1155       for ILU(0) factorization with natural ordering
1156   */
1157   if (a->bs < 8) {
1158     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1159   } else {
1160     if (!a->solve_work) {
1161       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1162       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1163     }
1164   }
1165 
1166   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1167 
1168   PetscFunctionReturn(0);
1169 }
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1172 int MatPrintHelp_SeqBAIJ(Mat A)
1173 {
1174   static PetscTruth called = PETSC_FALSE;
1175   MPI_Comm          comm = A->comm;
1176   int               ierr;
1177 
1178   PetscFunctionBegin;
1179   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1180   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1181   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 EXTERN_C_BEGIN
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1188 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1189 {
1190   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1191   int         i,nz,nbs;
1192 
1193   PetscFunctionBegin;
1194   nz  = baij->maxnz/baij->bs2;
1195   nbs = baij->nbs;
1196   for (i=0; i<nz; i++) {
1197     baij->j[i] = indices[i];
1198   }
1199   baij->nz = nz;
1200   for (i=0; i<nbs; i++) {
1201     baij->ilen[i] = baij->imax[i];
1202   }
1203 
1204   PetscFunctionReturn(0);
1205 }
1206 EXTERN_C_END
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1210 /*@
1211     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1212        in the matrix.
1213 
1214   Input Parameters:
1215 +  mat - the SeqBAIJ matrix
1216 -  indices - the column indices
1217 
1218   Level: advanced
1219 
1220   Notes:
1221     This can be called if you have precomputed the nonzero structure of the
1222   matrix and want to provide it to the matrix object to improve the performance
1223   of the MatSetValues() operation.
1224 
1225     You MUST have set the correct numbers of nonzeros per row in the call to
1226   MatCreateSeqBAIJ().
1227 
1228     MUST be called before any calls to MatSetValues();
1229 
1230 @*/
1231 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1232 {
1233   int ierr,(*f)(Mat,int *);
1234 
1235   PetscFunctionBegin;
1236   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1237   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1238   if (f) {
1239     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1240   } else {
1241     SETERRQ(1,"Wrong type of matrix to set column indices");
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1248 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1249 {
1250   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1251   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1252   PetscReal    atmp;
1253   PetscScalar  *x,zero = 0.0;
1254   MatScalar    *aa;
1255   int          ncols,brow,krow,kcol;
1256 
1257   PetscFunctionBegin;
1258   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1259   bs   = a->bs;
1260   aa   = a->a;
1261   ai   = a->i;
1262   aj   = a->j;
1263   mbs = a->mbs;
1264 
1265   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1266   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1267   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1268   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1269   for (i=0; i<mbs; i++) {
1270     ncols = ai[1] - ai[0]; ai++;
1271     brow  = bs*i;
1272     for (j=0; j<ncols; j++){
1273       /* bcol = bs*(*aj); */
1274       for (kcol=0; kcol<bs; kcol++){
1275         for (krow=0; krow<bs; krow++){
1276           atmp = PetscAbsScalar(*aa); aa++;
1277           row = brow + krow;    /* row index */
1278           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1279           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1280         }
1281       }
1282       aj++;
1283     }
1284   }
1285   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1291 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1292 {
1293   int        ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1302 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1303 {
1304   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1305   PetscFunctionBegin;
1306   *array = a->a;
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1312 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1313 {
1314   PetscFunctionBegin;
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /* -------------------------------------------------------------------*/
1319 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1320        MatGetRow_SeqBAIJ,
1321        MatRestoreRow_SeqBAIJ,
1322        MatMult_SeqBAIJ_N,
1323        MatMultAdd_SeqBAIJ_N,
1324        MatMultTranspose_SeqBAIJ,
1325        MatMultTransposeAdd_SeqBAIJ,
1326        MatSolve_SeqBAIJ_N,
1327        0,
1328        0,
1329        0,
1330        MatLUFactor_SeqBAIJ,
1331        0,
1332        0,
1333        MatTranspose_SeqBAIJ,
1334        MatGetInfo_SeqBAIJ,
1335        MatEqual_SeqBAIJ,
1336        MatGetDiagonal_SeqBAIJ,
1337        MatDiagonalScale_SeqBAIJ,
1338        MatNorm_SeqBAIJ,
1339        0,
1340        MatAssemblyEnd_SeqBAIJ,
1341        0,
1342        MatSetOption_SeqBAIJ,
1343        MatZeroEntries_SeqBAIJ,
1344        MatZeroRows_SeqBAIJ,
1345        MatLUFactorSymbolic_SeqBAIJ,
1346        MatLUFactorNumeric_SeqBAIJ_N,
1347        0,
1348        0,
1349        MatSetUpPreallocation_SeqBAIJ,
1350        MatILUFactorSymbolic_SeqBAIJ,
1351        0,
1352        MatGetArray_SeqBAIJ,
1353        MatRestoreArray_SeqBAIJ,
1354        MatDuplicate_SeqBAIJ,
1355        0,
1356        0,
1357        MatILUFactor_SeqBAIJ,
1358        0,
1359        0,
1360        MatGetSubMatrices_SeqBAIJ,
1361        MatIncreaseOverlap_SeqBAIJ,
1362        MatGetValues_SeqBAIJ,
1363        0,
1364        MatPrintHelp_SeqBAIJ,
1365        MatScale_SeqBAIJ,
1366        0,
1367        0,
1368        0,
1369        MatGetBlockSize_SeqBAIJ,
1370        MatGetRowIJ_SeqBAIJ,
1371        MatRestoreRowIJ_SeqBAIJ,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        MatSetValuesBlocked_SeqBAIJ,
1379        MatGetSubMatrix_SeqBAIJ,
1380        MatDestroy_SeqBAIJ,
1381        MatView_SeqBAIJ,
1382        MatGetPetscMaps_Petsc,
1383        0,
1384        0,
1385        0,
1386        0,
1387        0,
1388        0,
1389        MatGetRowMax_SeqBAIJ,
1390        MatConvert_Basic};
1391 
1392 EXTERN_C_BEGIN
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1395 int MatStoreValues_SeqBAIJ(Mat mat)
1396 {
1397   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1398   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1399   int         ierr;
1400 
1401   PetscFunctionBegin;
1402   if (aij->nonew != 1) {
1403     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1404   }
1405 
1406   /* allocate space for values if not already there */
1407   if (!aij->saved_values) {
1408     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1409   }
1410 
1411   /* copy values over */
1412   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 EXTERN_C_END
1416 
1417 EXTERN_C_BEGIN
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1420 int MatRetrieveValues_SeqBAIJ(Mat mat)
1421 {
1422   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1423   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1424 
1425   PetscFunctionBegin;
1426   if (aij->nonew != 1) {
1427     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1428   }
1429   if (!aij->saved_values) {
1430     SETERRQ(1,"Must call MatStoreValues(A);first");
1431   }
1432 
1433   /* copy values over */
1434   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 EXTERN_C_END
1438 
1439 EXTERN_C_BEGIN
1440 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1441 EXTERN_C_END
1442 
1443 EXTERN_C_BEGIN
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatCreate_SeqBAIJ"
1446 int MatCreate_SeqBAIJ(Mat B)
1447 {
1448   int         ierr,size;
1449   Mat_SeqBAIJ *b;
1450 
1451   PetscFunctionBegin;
1452   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1453   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1454 
1455   B->m = B->M = PetscMax(B->m,B->M);
1456   B->n = B->N = PetscMax(B->n,B->N);
1457   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1458   B->data = (void*)b;
1459   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1460   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1461   B->factor           = 0;
1462   B->lupivotthreshold = 1.0;
1463   B->mapping          = 0;
1464   b->row              = 0;
1465   b->col              = 0;
1466   b->icol             = 0;
1467   b->reallocs         = 0;
1468   b->saved_values     = 0;
1469 #if defined(PETSC_USE_MAT_SINGLE)
1470   b->setvalueslen     = 0;
1471   b->setvaluescopy    = PETSC_NULL;
1472 #endif
1473   b->single_precision_solves = PETSC_FALSE;
1474 
1475   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1476   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1477 
1478   b->sorted           = PETSC_FALSE;
1479   b->roworiented      = PETSC_TRUE;
1480   b->nonew            = 0;
1481   b->diag             = 0;
1482   b->solve_work       = 0;
1483   b->mult_work        = 0;
1484   b->spptr            = 0;
1485   B->info.nz_unneeded = (PetscReal)b->maxnz;
1486   b->keepzeroedrows   = PETSC_FALSE;
1487 
1488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1489                                      "MatStoreValues_SeqBAIJ",
1490                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1492                                      "MatRetrieveValues_SeqBAIJ",
1493                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1495                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1496                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1498                                      "MatConvert_SeqBAIJ_SeqAIJ",
1499                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 EXTERN_C_END
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1506 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1507 {
1508   Mat         C;
1509   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1510   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1511 
1512   PetscFunctionBegin;
1513   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1514 
1515   *B = 0;
1516   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1517   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1518   c    = (Mat_SeqBAIJ*)C->data;
1519 
1520   c->bs         = a->bs;
1521   c->bs2        = a->bs2;
1522   c->mbs        = a->mbs;
1523   c->nbs        = a->nbs;
1524   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1525 
1526   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1527   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1528   for (i=0; i<mbs; i++) {
1529     c->imax[i] = a->imax[i];
1530     c->ilen[i] = a->ilen[i];
1531   }
1532 
1533   /* allocate the matrix space */
1534   c->singlemalloc = PETSC_TRUE;
1535   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1536   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1537   c->j = (int*)(c->a + nz*bs2);
1538   c->i = c->j + nz;
1539   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1540   if (mbs > 0) {
1541     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1542     if (cpvalues == MAT_COPY_VALUES) {
1543       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1544     } else {
1545       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1546     }
1547   }
1548 
1549   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1550   c->sorted      = a->sorted;
1551   c->roworiented = a->roworiented;
1552   c->nonew       = a->nonew;
1553 
1554   if (a->diag) {
1555     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1556     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1557     for (i=0; i<mbs; i++) {
1558       c->diag[i] = a->diag[i];
1559     }
1560   } else c->diag        = 0;
1561   c->nz                 = a->nz;
1562   c->maxnz              = a->maxnz;
1563   c->solve_work         = 0;
1564   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1565   c->mult_work          = 0;
1566   C->preallocated       = PETSC_TRUE;
1567   C->assembled          = PETSC_TRUE;
1568   *B = C;
1569   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 EXTERN_C_BEGIN
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatLoad_SeqBAIJ"
1576 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1577 {
1578   Mat_SeqBAIJ  *a;
1579   Mat          B;
1580   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1581   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1582   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1583   int          *masked,nmask,tmp,bs2,ishift;
1584   PetscScalar  *aa;
1585   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1586 
1587   PetscFunctionBegin;
1588   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1589   bs2  = bs*bs;
1590 
1591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1592   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1593   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1594   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1595   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1596   M = header[1]; N = header[2]; nz = header[3];
1597 
1598   if (header[3] < 0) {
1599     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1600   }
1601 
1602   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1603 
1604   /*
1605      This code adds extra rows to make sure the number of rows is
1606     divisible by the blocksize
1607   */
1608   mbs        = M/bs;
1609   extra_rows = bs - M + bs*(mbs);
1610   if (extra_rows == bs) extra_rows = 0;
1611   else                  mbs++;
1612   if (extra_rows) {
1613     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1614   }
1615 
1616   /* read in row lengths */
1617   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1618   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1619   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1620 
1621   /* read in column indices */
1622   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1623   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1624   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1625 
1626   /* loop over row lengths determining block row lengths */
1627   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1628   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1629   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1630   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1631   masked   = mask + mbs;
1632   rowcount = 0; nzcount = 0;
1633   for (i=0; i<mbs; i++) {
1634     nmask = 0;
1635     for (j=0; j<bs; j++) {
1636       kmax = rowlengths[rowcount];
1637       for (k=0; k<kmax; k++) {
1638         tmp = jj[nzcount++]/bs;
1639         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1640       }
1641       rowcount++;
1642     }
1643     browlengths[i] += nmask;
1644     /* zero out the mask elements we set */
1645     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1646   }
1647 
1648   /* create our matrix */
1649   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1650   B = *A;
1651   a = (Mat_SeqBAIJ*)B->data;
1652 
1653   /* set matrix "i" values */
1654   a->i[0] = 0;
1655   for (i=1; i<= mbs; i++) {
1656     a->i[i]      = a->i[i-1] + browlengths[i-1];
1657     a->ilen[i-1] = browlengths[i-1];
1658   }
1659   a->nz         = 0;
1660   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1661 
1662   /* read in nonzero values */
1663   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1664   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1665   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1666 
1667   /* set "a" and "j" values into matrix */
1668   nzcount = 0; jcount = 0;
1669   for (i=0; i<mbs; i++) {
1670     nzcountb = nzcount;
1671     nmask    = 0;
1672     for (j=0; j<bs; j++) {
1673       kmax = rowlengths[i*bs+j];
1674       for (k=0; k<kmax; k++) {
1675         tmp = jj[nzcount++]/bs;
1676 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1677       }
1678     }
1679     /* sort the masked values */
1680     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1681 
1682     /* set "j" values into matrix */
1683     maskcount = 1;
1684     for (j=0; j<nmask; j++) {
1685       a->j[jcount++]  = masked[j];
1686       mask[masked[j]] = maskcount++;
1687     }
1688     /* set "a" values into matrix */
1689     ishift = bs2*a->i[i];
1690     for (j=0; j<bs; j++) {
1691       kmax = rowlengths[i*bs+j];
1692       for (k=0; k<kmax; k++) {
1693         tmp       = jj[nzcountb]/bs ;
1694         block     = mask[tmp] - 1;
1695         point     = jj[nzcountb] - bs*tmp;
1696         idx       = ishift + bs2*block + j + bs*point;
1697         a->a[idx] = (MatScalar)aa[nzcountb++];
1698       }
1699     }
1700     /* zero out the mask elements we set */
1701     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1702   }
1703   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1704 
1705   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1706   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1707   ierr = PetscFree(aa);CHKERRQ(ierr);
1708   ierr = PetscFree(jj);CHKERRQ(ierr);
1709   ierr = PetscFree(mask);CHKERRQ(ierr);
1710 
1711   B->assembled = PETSC_TRUE;
1712 
1713   ierr = MatView_Private(B);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 EXTERN_C_END
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatCreateSeqBAIJ"
1720 /*@C
1721    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1722    compressed row) format.  For good matrix assembly performance the
1723    user should preallocate the matrix storage by setting the parameter nz
1724    (or the array nnz).  By setting these parameters accurately, performance
1725    during matrix assembly can be increased by more than a factor of 50.
1726 
1727    Collective on MPI_Comm
1728 
1729    Input Parameters:
1730 +  comm - MPI communicator, set to PETSC_COMM_SELF
1731 .  bs - size of block
1732 .  m - number of rows
1733 .  n - number of columns
1734 .  nz - number of nonzero blocks  per block row (same for all rows)
1735 -  nnz - array containing the number of nonzero blocks in the various block rows
1736          (possibly different for each block row) or PETSC_NULL
1737 
1738    Output Parameter:
1739 .  A - the matrix
1740 
1741    Options Database Keys:
1742 .   -mat_no_unroll - uses code that does not unroll the loops in the
1743                      block calculations (much slower)
1744 .    -mat_block_size - size of the blocks to use
1745 
1746    Level: intermediate
1747 
1748    Notes:
1749    A nonzero block is any block that as 1 or more nonzeros in it
1750 
1751    The block AIJ format is fully compatible with standard Fortran 77
1752    storage.  That is, the stored row and column indices can begin at
1753    either one (as in Fortran) or zero.  See the users' manual for details.
1754 
1755    Specify the preallocated storage with either nz or nnz (not both).
1756    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1757    allocation.  For additional details, see the users manual chapter on
1758    matrices.
1759 
1760 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1761 @*/
1762 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1763 {
1764   int ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1768   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1769   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1775 /*@C
1776    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1777    per row in the matrix. For good matrix assembly performance the
1778    user should preallocate the matrix storage by setting the parameter nz
1779    (or the array nnz).  By setting these parameters accurately, performance
1780    during matrix assembly can be increased by more than a factor of 50.
1781 
1782    Collective on MPI_Comm
1783 
1784    Input Parameters:
1785 +  A - the matrix
1786 .  bs - size of block
1787 .  nz - number of block nonzeros per block row (same for all rows)
1788 -  nnz - array containing the number of block nonzeros in the various block rows
1789          (possibly different for each block row) or PETSC_NULL
1790 
1791    Options Database Keys:
1792 .   -mat_no_unroll - uses code that does not unroll the loops in the
1793                      block calculations (much slower)
1794 .    -mat_block_size - size of the blocks to use
1795 
1796    Level: intermediate
1797 
1798    Notes:
1799    The block AIJ format is fully compatible with standard Fortran 77
1800    storage.  That is, the stored row and column indices can begin at
1801    either one (as in Fortran) or zero.  See the users' manual for details.
1802 
1803    Specify the preallocated storage with either nz or nnz (not both).
1804    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1805    allocation.  For additional details, see the users manual chapter on
1806    matrices.
1807 
1808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1809 @*/
1810 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1811 {
1812   Mat_SeqBAIJ *b;
1813   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1814   PetscTruth  flg;
1815 
1816   PetscFunctionBegin;
1817   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1818   if (!flg) PetscFunctionReturn(0);
1819 
1820   B->preallocated = PETSC_TRUE;
1821   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1822   if (nnz && newbs != bs) {
1823     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1824   }
1825   bs   = newbs;
1826 
1827   mbs  = B->m/bs;
1828   nbs  = B->n/bs;
1829   bs2  = bs*bs;
1830 
1831   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1832     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1833   }
1834 
1835   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1836   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1837   if (nnz) {
1838     for (i=0; i<mbs; i++) {
1839       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1840       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1841     }
1842   }
1843 
1844   b       = (Mat_SeqBAIJ*)B->data;
1845   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1846   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1847   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1848   if (!flg) {
1849     switch (bs) {
1850     case 1:
1851       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1852       B->ops->mult            = MatMult_SeqBAIJ_1;
1853       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1854       break;
1855     case 2:
1856       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1857       B->ops->mult            = MatMult_SeqBAIJ_2;
1858       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1859       break;
1860     case 3:
1861       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1862       B->ops->mult            = MatMult_SeqBAIJ_3;
1863       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1864       break;
1865     case 4:
1866       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1867       B->ops->mult            = MatMult_SeqBAIJ_4;
1868       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1869       break;
1870     case 5:
1871       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1872       B->ops->mult            = MatMult_SeqBAIJ_5;
1873       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1874       break;
1875     case 6:
1876       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1877       B->ops->mult            = MatMult_SeqBAIJ_6;
1878       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1879       break;
1880     case 7:
1881       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1882       B->ops->mult            = MatMult_SeqBAIJ_7;
1883       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1884       break;
1885     default:
1886       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1887       B->ops->mult            = MatMult_SeqBAIJ_N;
1888       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1889       break;
1890     }
1891   }
1892   b->bs      = bs;
1893   b->mbs     = mbs;
1894   b->nbs     = nbs;
1895   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1896   if (!nnz) {
1897     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1898     else if (nz <= 0)        nz = 1;
1899     for (i=0; i<mbs; i++) b->imax[i] = nz;
1900     nz = nz*mbs;
1901   } else {
1902     nz = 0;
1903     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1904   }
1905 
1906   /* allocate the matrix space */
1907   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1908   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1909   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1910   b->j  = (int*)(b->a + nz*bs2);
1911   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1912   b->i  = b->j + nz;
1913   b->singlemalloc = PETSC_TRUE;
1914 
1915   b->i[0] = 0;
1916   for (i=1; i<mbs+1; i++) {
1917     b->i[i] = b->i[i-1] + b->imax[i-1];
1918   }
1919 
1920   /* b->ilen will count nonzeros in each block row so far. */
1921   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1922   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1923   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1924 
1925   b->bs               = bs;
1926   b->bs2              = bs2;
1927   b->mbs              = mbs;
1928   b->nz               = 0;
1929   b->maxnz            = nz*bs2;
1930   B->info.nz_unneeded = (PetscReal)b->maxnz;
1931   PetscFunctionReturn(0);
1932 }
1933 
1934