xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ea709b57ddb8cc304f11df2e9a39d6a8fdb28b53)
1 /*$Id: baij.c,v 1.244 2001/08/06 21:15:36 bsmith Exp balay $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled_local,sse_enabled_global;
217       int        ierr;
218 
219       sse_enabled_local  = PETSC_FALSE;
220       sse_enabled_global = PETSC_FALSE;
221 
222       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
223 #if defined(PETSC_HAVE_SSE)
224       if (sse_enabled_local) {
225           a->single_precision_solves = PETSC_TRUE;
226           A->ops->solve              = MatSolve_SeqBAIJ_Update;
227           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
228           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
229           break;
230       } else {
231         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
232       }
233 #else
234       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
235 #endif
236     }
237     break;
238   default:
239     SETERRQ(PETSC_ERR_SUP,"unknown option");
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
246 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
247 {
248   PetscFunctionBegin;
249   if (m) *m = 0;
250   if (n) *n = A->m;
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatGetRow_SeqBAIJ"
256 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
257 {
258   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
259   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
260   MatScalar    *aa,*aa_i;
261   PetscScalar  *v_i;
262 
263   PetscFunctionBegin;
264   bs  = a->bs;
265   ai  = a->i;
266   aj  = a->j;
267   aa  = a->a;
268   bs2 = a->bs2;
269 
270   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
271 
272   bn  = row/bs;   /* Block number */
273   bp  = row % bs; /* Block Position */
274   M   = ai[bn+1] - ai[bn];
275   *nz = bs*M;
276 
277   if (v) {
278     *v = 0;
279     if (*nz) {
280       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
281       for (i=0; i<M; i++) { /* for each block in the block row */
282         v_i  = *v + i*bs;
283         aa_i = aa + bs2*(ai[bn] + i);
284         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
285       }
286     }
287   }
288 
289   if (idx) {
290     *idx = 0;
291     if (*nz) {
292       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
293       for (i=0; i<M; i++) { /* for each block in the block row */
294         idx_i = *idx + i*bs;
295         itmp  = bs*aj[ai[bn] + i];
296         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
297       }
298     }
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
305 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
306 {
307   int ierr;
308 
309   PetscFunctionBegin;
310   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
311   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatTranspose_SeqBAIJ"
317 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
318 {
319   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
320   Mat         C;
321   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
322   int         *rows,*cols,bs2=a->bs2;
323   PetscScalar *array;
324 
325   PetscFunctionBegin;
326   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
327   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
328   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
329 
330 #if defined(PETSC_USE_MAT_SINGLE)
331   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
332   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
333 #else
334   array = a->a;
335 #endif
336 
337   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
338   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
339   ierr = PetscFree(col);CHKERRQ(ierr);
340   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
341   cols = rows + bs;
342   for (i=0; i<mbs; i++) {
343     cols[0] = i*bs;
344     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
345     len = ai[i+1] - ai[i];
346     for (j=0; j<len; j++) {
347       rows[0] = (*aj++)*bs;
348       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
349       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
350       array += bs2;
351     }
352   }
353   ierr = PetscFree(rows);CHKERRQ(ierr);
354 #if defined(PETSC_USE_MAT_SINGLE)
355   ierr = PetscFree(array);
356 #endif
357 
358   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360 
361   if (B) {
362     *B = C;
363   } else {
364     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
365   }
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
371 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
372 {
373   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
374   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
375   PetscScalar *aa;
376   FILE        *file;
377 
378   PetscFunctionBegin;
379   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
380   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
381   col_lens[0] = MAT_COOKIE;
382 
383   col_lens[1] = A->m;
384   col_lens[2] = A->n;
385   col_lens[3] = a->nz*bs2;
386 
387   /* store lengths of each row and write (including header) to file */
388   count = 0;
389   for (i=0; i<a->mbs; i++) {
390     for (j=0; j<bs; j++) {
391       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
392     }
393   }
394   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
395   ierr = PetscFree(col_lens);CHKERRQ(ierr);
396 
397   /* store column indices (zero start index) */
398   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
399   count = 0;
400   for (i=0; i<a->mbs; i++) {
401     for (j=0; j<bs; j++) {
402       for (k=a->i[i]; k<a->i[i+1]; k++) {
403         for (l=0; l<bs; l++) {
404           jj[count++] = bs*a->j[k] + l;
405         }
406       }
407     }
408   }
409   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
410   ierr = PetscFree(jj);CHKERRQ(ierr);
411 
412   /* store nonzero values */
413   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
414   count = 0;
415   for (i=0; i<a->mbs; i++) {
416     for (j=0; j<bs; j++) {
417       for (k=a->i[i]; k<a->i[i+1]; k++) {
418         for (l=0; l<bs; l++) {
419           aa[count++] = a->a[bs2*k + l*bs + j];
420         }
421       }
422     }
423   }
424   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
425   ierr = PetscFree(aa);CHKERRQ(ierr);
426 
427   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
428   if (file) {
429     fprintf(file,"-matload_block_size %d\n",a->bs);
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
436 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
437 {
438   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
439   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
440   PetscViewerFormat format;
441 
442   PetscFunctionBegin;
443   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
444   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
445     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
446   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
447     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
448   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
449     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
450     for (i=0; i<a->mbs; i++) {
451       for (j=0; j<bs; j++) {
452         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
453         for (k=a->i[i]; k<a->i[i+1]; k++) {
454           for (l=0; l<bs; l++) {
455 #if defined(PETSC_USE_COMPLEX)
456             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
458                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
459             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
460               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
461                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
462             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
463               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
464             }
465 #else
466             if (a->a[bs2*k + l*bs + j] != 0.0) {
467               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
468             }
469 #endif
470           }
471         }
472         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
473       }
474     }
475     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
476   } else {
477     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
478     for (i=0; i<a->mbs; i++) {
479       for (j=0; j<bs; j++) {
480         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
481         for (k=a->i[i]; k<a->i[i+1]; k++) {
482           for (l=0; l<bs; l++) {
483 #if defined(PETSC_USE_COMPLEX)
484             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
486                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
487             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
488               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
489                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
490             } else {
491               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
492             }
493 #else
494             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
495 #endif
496           }
497         }
498         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
499       }
500     }
501     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
502   }
503   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
509 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
510 {
511   Mat          A = (Mat) Aa;
512   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
513   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
514   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
515   MatScalar    *aa;
516   PetscViewer  viewer;
517 
518   PetscFunctionBegin;
519 
520   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
521   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
522 
523   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
524 
525   /* loop over matrix elements drawing boxes */
526   color = PETSC_DRAW_BLUE;
527   for (i=0,row=0; i<mbs; i++,row+=bs) {
528     for (j=a->i[i]; j<a->i[i+1]; j++) {
529       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
530       x_l = a->j[j]*bs; x_r = x_l + 1.0;
531       aa = a->a + j*bs2;
532       for (k=0; k<bs; k++) {
533         for (l=0; l<bs; l++) {
534           if (PetscRealPart(*aa++) >=  0.) continue;
535           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
536         }
537       }
538     }
539   }
540   color = PETSC_DRAW_CYAN;
541   for (i=0,row=0; i<mbs; i++,row+=bs) {
542     for (j=a->i[i]; j<a->i[i+1]; j++) {
543       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
544       x_l = a->j[j]*bs; x_r = x_l + 1.0;
545       aa = a->a + j*bs2;
546       for (k=0; k<bs; k++) {
547         for (l=0; l<bs; l++) {
548           if (PetscRealPart(*aa++) != 0.) continue;
549           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
550         }
551       }
552     }
553   }
554 
555   color = PETSC_DRAW_RED;
556   for (i=0,row=0; i<mbs; i++,row+=bs) {
557     for (j=a->i[i]; j<a->i[i+1]; j++) {
558       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
559       x_l = a->j[j]*bs; x_r = x_l + 1.0;
560       aa = a->a + j*bs2;
561       for (k=0; k<bs; k++) {
562         for (l=0; l<bs; l++) {
563           if (PetscRealPart(*aa++) <= 0.) continue;
564           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
565         }
566       }
567     }
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
574 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
575 {
576   int          ierr;
577   PetscReal    xl,yl,xr,yr,w,h;
578   PetscDraw    draw;
579   PetscTruth   isnull;
580 
581   PetscFunctionBegin;
582 
583   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
584   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
585 
586   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
587   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
588   xr += w;    yr += h;  xl = -w;     yl = -h;
589   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
590   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
591   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_SeqBAIJ"
597 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
598 {
599   int        ierr;
600   PetscTruth issocket,isascii,isbinary,isdraw;
601 
602   PetscFunctionBegin;
603   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
604   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
607   if (issocket) {
608     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
609   } else if (isascii){
610     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
611   } else if (isbinary) {
612     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
613   } else if (isdraw) {
614     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
615   } else {
616     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatGetValues_SeqBAIJ"
624 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
625 {
626   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
627   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
628   int        *ai = a->i,*ailen = a->ilen;
629   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
630   MatScalar  *ap,*aa = a->a,zero = 0.0;
631 
632   PetscFunctionBegin;
633   for (k=0; k<m; k++) { /* loop over rows */
634     row  = im[k]; brow = row/bs;
635     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
636     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
637     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
638     nrow = ailen[brow];
639     for (l=0; l<n; l++) { /* loop over columns */
640       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
641       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
642       col  = in[l] ;
643       bcol = col/bs;
644       cidx = col%bs;
645       ridx = row%bs;
646       high = nrow;
647       low  = 0; /* assume unsorted */
648       while (high-low > 5) {
649         t = (low+high)/2;
650         if (rp[t] > bcol) high = t;
651         else             low  = t;
652       }
653       for (i=low; i<high; i++) {
654         if (rp[i] > bcol) break;
655         if (rp[i] == bcol) {
656           *v++ = ap[bs2*i+bs*cidx+ridx];
657           goto finished;
658         }
659       }
660       *v++ = zero;
661       finished:;
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 #if defined(PETSC_USE_MAT_SINGLE)
668 #undef __FUNCT__
669 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
670 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
671 {
672   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
673   int         ierr,i,N = m*n*b->bs2;
674   MatScalar   *vsingle;
675 
676   PetscFunctionBegin;
677   if (N > b->setvalueslen) {
678     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
679     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
680     b->setvalueslen  = N;
681   }
682   vsingle = b->setvaluescopy;
683   for (i=0; i<N; i++) {
684     vsingle[i] = v[i];
685   }
686   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 #endif
690 
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
694 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
695 {
696   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
697   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
698   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
699   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
700   PetscTruth  roworiented=a->roworiented;
701   MatScalar   *value = v,*ap,*aa = a->a,*bap;
702 
703   PetscFunctionBegin;
704   if (roworiented) {
705     stepval = (n-1)*bs;
706   } else {
707     stepval = (m-1)*bs;
708   }
709   for (k=0; k<m; k++) { /* loop over added rows */
710     row  = im[k];
711     if (row < 0) continue;
712 #if defined(PETSC_USE_BOPT_g)
713     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
714 #endif
715     rp   = aj + ai[row];
716     ap   = aa + bs2*ai[row];
717     rmax = imax[row];
718     nrow = ailen[row];
719     low  = 0;
720     for (l=0; l<n; l++) { /* loop over added columns */
721       if (in[l] < 0) continue;
722 #if defined(PETSC_USE_BOPT_g)
723       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
724 #endif
725       col = in[l];
726       if (roworiented) {
727         value = v + k*(stepval+bs)*bs + l*bs;
728       } else {
729         value = v + l*(stepval+bs)*bs + k*bs;
730       }
731       if (!sorted) low = 0; high = nrow;
732       while (high-low > 7) {
733         t = (low+high)/2;
734         if (rp[t] > col) high = t;
735         else             low  = t;
736       }
737       for (i=low; i<high; i++) {
738         if (rp[i] > col) break;
739         if (rp[i] == col) {
740           bap  = ap +  bs2*i;
741           if (roworiented) {
742             if (is == ADD_VALUES) {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] += *value++;
746                 }
747               }
748             } else {
749               for (ii=0; ii<bs; ii++,value+=stepval) {
750                 for (jj=ii; jj<bs2; jj+=bs) {
751                   bap[jj] = *value++;
752                 }
753               }
754             }
755           } else {
756             if (is == ADD_VALUES) {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++ += *value++;
760                 }
761               }
762             } else {
763               for (ii=0; ii<bs; ii++,value+=stepval) {
764                 for (jj=0; jj<bs; jj++) {
765                   *bap++  = *value++;
766                 }
767               }
768             }
769           }
770           goto noinsert2;
771         }
772       }
773       if (nonew == 1) goto noinsert2;
774       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775       if (nrow >= rmax) {
776         /* there is no extra room in row, therefore enlarge */
777         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
778         MatScalar *new_a;
779 
780         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
781 
782         /* malloc new storage space */
783         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
784 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
785         new_j   = (int*)(new_a + bs2*new_nz);
786         new_i   = new_j + new_nz;
787 
788         /* copy over old data into new slots */
789         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
790         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
791         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
792         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
793         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
794         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
795         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
796         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
797         /* free up old matrix storage */
798         ierr = PetscFree(a->a);CHKERRQ(ierr);
799         if (!a->singlemalloc) {
800           ierr = PetscFree(a->i);CHKERRQ(ierr);
801           ierr = PetscFree(a->j);CHKERRQ(ierr);
802         }
803         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
804         a->singlemalloc = PETSC_TRUE;
805 
806         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
807         rmax = imax[row] = imax[row] + CHUNKSIZE;
808         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
809         a->maxnz += bs2*CHUNKSIZE;
810         a->reallocs++;
811         a->nz++;
812       }
813       N = nrow++ - 1;
814       /* shift up all the later entries in this row */
815       for (ii=N; ii>=i; ii--) {
816         rp[ii+1] = rp[ii];
817         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
818       }
819       if (N >= i) {
820         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
821       }
822       rp[i] = col;
823       bap   = ap +  bs2*i;
824       if (roworiented) {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=ii; jj<bs2; jj+=bs) {
827             bap[jj] = *value++;
828           }
829         }
830       } else {
831         for (ii=0; ii<bs; ii++,value+=stepval) {
832           for (jj=0; jj<bs; jj++) {
833             *bap++  = *value++;
834           }
835         }
836       }
837       noinsert2:;
838       low = i;
839     }
840     ailen[row] = nrow;
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
848 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
849 {
850   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
851   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
852   int        m = A->m,*ip,N,*ailen = a->ilen;
853   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
854   MatScalar  *aa = a->a,*ap;
855 
856   PetscFunctionBegin;
857   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
858 
859   if (m) rmax = ailen[0];
860   for (i=1; i<mbs; i++) {
861     /* move each row back by the amount of empty slots (fshift) before it*/
862     fshift += imax[i-1] - ailen[i-1];
863     rmax   = PetscMax(rmax,ailen[i]);
864     if (fshift) {
865       ip = aj + ai[i]; ap = aa + bs2*ai[i];
866       N = ailen[i];
867       for (j=0; j<N; j++) {
868         ip[j-fshift] = ip[j];
869         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
870       }
871     }
872     ai[i] = ai[i-1] + ailen[i-1];
873   }
874   if (mbs) {
875     fshift += imax[mbs-1] - ailen[mbs-1];
876     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
877   }
878   /* reset ilen and imax for each row */
879   for (i=0; i<mbs; i++) {
880     ailen[i] = imax[i] = ai[i+1] - ai[i];
881   }
882   a->nz = ai[mbs];
883 
884   /* diagonals may have moved, so kill the diagonal pointers */
885   if (fshift && a->diag) {
886     ierr = PetscFree(a->diag);CHKERRQ(ierr);
887     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
888     a->diag = 0;
889   }
890   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
891            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
892   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
893            a->reallocs);
894   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
895   a->reallocs          = 0;
896   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
897 
898   PetscFunctionReturn(0);
899 }
900 
901 
902 
903 /*
904    This function returns an array of flags which indicate the locations of contiguous
905    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
906    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
907    Assume: sizes should be long enough to hold all the values.
908 */
909 #undef __FUNCT__
910 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
911 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
912 {
913   int        i,j,k,row;
914   PetscTruth flg;
915 
916   PetscFunctionBegin;
917   for (i=0,j=0; i<n; j++) {
918     row = idx[i];
919     if (row%bs!=0) { /* Not the begining of a block */
920       sizes[j] = 1;
921       i++;
922     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
923       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
924       i++;
925     } else { /* Begining of the block, so check if the complete block exists */
926       flg = PETSC_TRUE;
927       for (k=1; k<bs; k++) {
928         if (row+k != idx[i+k]) { /* break in the block */
929           flg = PETSC_FALSE;
930           break;
931         }
932       }
933       if (flg == PETSC_TRUE) { /* No break in the bs */
934         sizes[j] = bs;
935         i+= bs;
936       } else {
937         sizes[j] = 1;
938         i++;
939       }
940     }
941   }
942   *bs_max = j;
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
948 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
949 {
950   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
951   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
952   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
953   PetscScalar zero = 0.0;
954   MatScalar   *aa;
955 
956   PetscFunctionBegin;
957   /* Make a copy of the IS and  sort it */
958   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
959   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
960 
961   /* allocate memory for rows,sizes */
962   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
963   sizes = rows + is_n;
964 
965   /* copy IS values to rows, and sort them */
966   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
967   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
968   if (baij->keepzeroedrows) {
969     for (i=0; i<is_n; i++) { sizes[i] = 1; }
970     bs_max = is_n;
971   } else {
972     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
973   }
974   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
975 
976   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
977     row   = rows[j];
978     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
979     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
980     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
981     if (sizes[i] == bs && !baij->keepzeroedrows) {
982       if (diag) {
983         if (baij->ilen[row/bs] > 0) {
984           baij->ilen[row/bs]       = 1;
985           baij->j[baij->i[row/bs]] = row/bs;
986           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
987         }
988         /* Now insert all the diagonal values for this bs */
989         for (k=0; k<bs; k++) {
990           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
991         }
992       } else { /* (!diag) */
993         baij->ilen[row/bs] = 0;
994       } /* end (!diag) */
995     } else { /* (sizes[i] != bs) */
996 #if defined (PETSC_USE_DEBUG)
997       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
998 #endif
999       for (k=0; k<count; k++) {
1000         aa[0] =  zero;
1001         aa    += bs;
1002       }
1003       if (diag) {
1004         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1005       }
1006     }
1007   }
1008 
1009   ierr = PetscFree(rows);CHKERRQ(ierr);
1010   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1016 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1017 {
1018   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1019   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1020   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1021   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1022   int         ridx,cidx,bs2=a->bs2,ierr;
1023   PetscTruth  roworiented=a->roworiented;
1024   MatScalar   *ap,value,*aa=a->a,*bap;
1025 
1026   PetscFunctionBegin;
1027   for (k=0; k<m; k++) { /* loop over added rows */
1028     row  = im[k]; brow = row/bs;
1029     if (row < 0) continue;
1030 #if defined(PETSC_USE_BOPT_g)
1031     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1032 #endif
1033     rp   = aj + ai[brow];
1034     ap   = aa + bs2*ai[brow];
1035     rmax = imax[brow];
1036     nrow = ailen[brow];
1037     low  = 0;
1038     for (l=0; l<n; l++) { /* loop over added columns */
1039       if (in[l] < 0) continue;
1040 #if defined(PETSC_USE_BOPT_g)
1041       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1042 #endif
1043       col = in[l]; bcol = col/bs;
1044       ridx = row % bs; cidx = col % bs;
1045       if (roworiented) {
1046         value = v[l + k*n];
1047       } else {
1048         value = v[k + l*m];
1049       }
1050       if (!sorted) low = 0; high = nrow;
1051       while (high-low > 7) {
1052         t = (low+high)/2;
1053         if (rp[t] > bcol) high = t;
1054         else              low  = t;
1055       }
1056       for (i=low; i<high; i++) {
1057         if (rp[i] > bcol) break;
1058         if (rp[i] == bcol) {
1059           bap  = ap +  bs2*i + bs*cidx + ridx;
1060           if (is == ADD_VALUES) *bap += value;
1061           else                  *bap  = value;
1062           goto noinsert1;
1063         }
1064       }
1065       if (nonew == 1) goto noinsert1;
1066       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1067       if (nrow >= rmax) {
1068         /* there is no extra room in row, therefore enlarge */
1069         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1070         MatScalar *new_a;
1071 
1072         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1073 
1074         /* Malloc new storage space */
1075         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1076 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1077         new_j   = (int*)(new_a + bs2*new_nz);
1078         new_i   = new_j + new_nz;
1079 
1080         /* copy over old data into new slots */
1081         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1082         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1083         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1084         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1085         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1086         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1087         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1088         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1089         /* free up old matrix storage */
1090         ierr = PetscFree(a->a);CHKERRQ(ierr);
1091         if (!a->singlemalloc) {
1092           ierr = PetscFree(a->i);CHKERRQ(ierr);
1093           ierr = PetscFree(a->j);CHKERRQ(ierr);
1094         }
1095         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1096         a->singlemalloc = PETSC_TRUE;
1097 
1098         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1099         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1100         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1101         a->maxnz += bs2*CHUNKSIZE;
1102         a->reallocs++;
1103         a->nz++;
1104       }
1105       N = nrow++ - 1;
1106       /* shift up all the later entries in this row */
1107       for (ii=N; ii>=i; ii--) {
1108         rp[ii+1] = rp[ii];
1109         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1110       }
1111       if (N>=i) {
1112         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1113       }
1114       rp[i]                      = bcol;
1115       ap[bs2*i + bs*cidx + ridx] = value;
1116       noinsert1:;
1117       low = i;
1118     }
1119     ailen[brow] = nrow;
1120   }
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1127 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1128 {
1129   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1130   Mat         outA;
1131   int         ierr;
1132   PetscTruth  row_identity,col_identity;
1133 
1134   PetscFunctionBegin;
1135   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1136   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1137   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1138   if (!row_identity || !col_identity) {
1139     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1140   }
1141 
1142   outA          = inA;
1143   inA->factor   = FACTOR_LU;
1144 
1145   if (!a->diag) {
1146     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1147   }
1148 
1149   a->row        = row;
1150   a->col        = col;
1151   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1152   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1153 
1154   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1155   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1156   PetscLogObjectParent(inA,a->icol);
1157 
1158   /*
1159       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1160       for ILU(0) factorization with natural ordering
1161   */
1162   if (a->bs < 8) {
1163     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1164   } else {
1165     if (!a->solve_work) {
1166       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1167       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1168     }
1169   }
1170 
1171   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1172 
1173   PetscFunctionReturn(0);
1174 }
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1177 int MatPrintHelp_SeqBAIJ(Mat A)
1178 {
1179   static PetscTruth called = PETSC_FALSE;
1180   MPI_Comm          comm = A->comm;
1181   int               ierr;
1182 
1183   PetscFunctionBegin;
1184   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1185   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1186   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 EXTERN_C_BEGIN
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1193 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1194 {
1195   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1196   int         i,nz,n;
1197 
1198   PetscFunctionBegin;
1199   nz = baij->maxnz;
1200   n  = mat->n;
1201   for (i=0; i<nz; i++) {
1202     baij->j[i] = indices[i];
1203   }
1204   baij->nz = nz;
1205   for (i=0; i<n; i++) {
1206     baij->ilen[i] = baij->imax[i];
1207   }
1208 
1209   PetscFunctionReturn(0);
1210 }
1211 EXTERN_C_END
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1215 /*@
1216     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1217        in the matrix.
1218 
1219   Input Parameters:
1220 +  mat - the SeqBAIJ matrix
1221 -  indices - the column indices
1222 
1223   Level: advanced
1224 
1225   Notes:
1226     This can be called if you have precomputed the nonzero structure of the
1227   matrix and want to provide it to the matrix object to improve the performance
1228   of the MatSetValues() operation.
1229 
1230     You MUST have set the correct numbers of nonzeros per row in the call to
1231   MatCreateSeqBAIJ().
1232 
1233     MUST be called before any calls to MatSetValues();
1234 
1235 @*/
1236 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1237 {
1238   int ierr,(*f)(Mat,int *);
1239 
1240   PetscFunctionBegin;
1241   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1242   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1243   if (f) {
1244     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1245   } else {
1246     SETERRQ(1,"Wrong type of matrix to set column indices");
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1253 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1254 {
1255   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1256   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1257   PetscReal    atmp;
1258   PetscScalar  *x,zero = 0.0;
1259   MatScalar    *aa;
1260   int          ncols,brow,krow,kcol;
1261 
1262   PetscFunctionBegin;
1263   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1264   bs   = a->bs;
1265   aa   = a->a;
1266   ai   = a->i;
1267   aj   = a->j;
1268   mbs = a->mbs;
1269 
1270   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1271   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1272   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1273   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1274   for (i=0; i<mbs; i++) {
1275     ncols = ai[1] - ai[0]; ai++;
1276     brow  = bs*i;
1277     for (j=0; j<ncols; j++){
1278       /* bcol = bs*(*aj); */
1279       for (kcol=0; kcol<bs; kcol++){
1280         for (krow=0; krow<bs; krow++){
1281           atmp = PetscAbsScalar(*aa); aa++;
1282           row = brow + krow;    /* row index */
1283           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1284           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1285         }
1286       }
1287       aj++;
1288     }
1289   }
1290   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1296 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1297 {
1298   int        ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1307 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1308 {
1309   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1310   PetscFunctionBegin;
1311   *array = a->a;
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1317 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1318 {
1319   PetscFunctionBegin;
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /* -------------------------------------------------------------------*/
1324 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1325        MatGetRow_SeqBAIJ,
1326        MatRestoreRow_SeqBAIJ,
1327        MatMult_SeqBAIJ_N,
1328        MatMultAdd_SeqBAIJ_N,
1329        MatMultTranspose_SeqBAIJ,
1330        MatMultTransposeAdd_SeqBAIJ,
1331        MatSolve_SeqBAIJ_N,
1332        0,
1333        0,
1334        0,
1335        MatLUFactor_SeqBAIJ,
1336        0,
1337        0,
1338        MatTranspose_SeqBAIJ,
1339        MatGetInfo_SeqBAIJ,
1340        MatEqual_SeqBAIJ,
1341        MatGetDiagonal_SeqBAIJ,
1342        MatDiagonalScale_SeqBAIJ,
1343        MatNorm_SeqBAIJ,
1344        0,
1345        MatAssemblyEnd_SeqBAIJ,
1346        0,
1347        MatSetOption_SeqBAIJ,
1348        MatZeroEntries_SeqBAIJ,
1349        MatZeroRows_SeqBAIJ,
1350        MatLUFactorSymbolic_SeqBAIJ,
1351        MatLUFactorNumeric_SeqBAIJ_N,
1352        0,
1353        0,
1354        MatSetUpPreallocation_SeqBAIJ,
1355        0,
1356        MatGetOwnershipRange_SeqBAIJ,
1357        MatILUFactorSymbolic_SeqBAIJ,
1358        0,
1359        MatGetArray_SeqBAIJ,
1360        MatRestoreArray_SeqBAIJ,
1361        MatDuplicate_SeqBAIJ,
1362        0,
1363        0,
1364        MatILUFactor_SeqBAIJ,
1365        0,
1366        0,
1367        MatGetSubMatrices_SeqBAIJ,
1368        MatIncreaseOverlap_SeqBAIJ,
1369        MatGetValues_SeqBAIJ,
1370        0,
1371        MatPrintHelp_SeqBAIJ,
1372        MatScale_SeqBAIJ,
1373        0,
1374        0,
1375        0,
1376        MatGetBlockSize_SeqBAIJ,
1377        MatGetRowIJ_SeqBAIJ,
1378        MatRestoreRowIJ_SeqBAIJ,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        MatSetValuesBlocked_SeqBAIJ,
1386        MatGetSubMatrix_SeqBAIJ,
1387        MatDestroy_SeqBAIJ,
1388        MatView_SeqBAIJ,
1389        MatGetPetscMaps_Petsc,
1390        0,
1391        0,
1392        0,
1393        0,
1394        0,
1395        0,
1396        MatGetRowMax_SeqBAIJ,
1397        MatConvert_Basic};
1398 
1399 EXTERN_C_BEGIN
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1402 int MatStoreValues_SeqBAIJ(Mat mat)
1403 {
1404   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1405   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1406   int         ierr;
1407 
1408   PetscFunctionBegin;
1409   if (aij->nonew != 1) {
1410     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1411   }
1412 
1413   /* allocate space for values if not already there */
1414   if (!aij->saved_values) {
1415     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1416   }
1417 
1418   /* copy values over */
1419   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 EXTERN_C_END
1423 
1424 EXTERN_C_BEGIN
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1427 int MatRetrieveValues_SeqBAIJ(Mat mat)
1428 {
1429   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1430   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1431 
1432   PetscFunctionBegin;
1433   if (aij->nonew != 1) {
1434     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1435   }
1436   if (!aij->saved_values) {
1437     SETERRQ(1,"Must call MatStoreValues(A);first");
1438   }
1439 
1440   /* copy values over */
1441   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 EXTERN_C_END
1445 
1446 EXTERN_C_BEGIN
1447 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1448 EXTERN_C_END
1449 
1450 EXTERN_C_BEGIN
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatCreate_SeqBAIJ"
1453 int MatCreate_SeqBAIJ(Mat B)
1454 {
1455   int         ierr,size;
1456   Mat_SeqBAIJ *b;
1457 
1458   PetscFunctionBegin;
1459   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1460   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1461 
1462   B->m = B->M = PetscMax(B->m,B->M);
1463   B->n = B->N = PetscMax(B->n,B->N);
1464   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1465   B->data = (void*)b;
1466   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1467   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1468   B->factor           = 0;
1469   B->lupivotthreshold = 1.0;
1470   B->mapping          = 0;
1471   b->row              = 0;
1472   b->col              = 0;
1473   b->icol             = 0;
1474   b->reallocs         = 0;
1475   b->saved_values     = 0;
1476 #if defined(PETSC_USE_MAT_SINGLE)
1477   b->setvalueslen     = 0;
1478   b->setvaluescopy    = PETSC_NULL;
1479 #endif
1480   b->single_precision_solves = PETSC_FALSE;
1481 
1482   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1483   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1484 
1485   b->sorted           = PETSC_FALSE;
1486   b->roworiented      = PETSC_TRUE;
1487   b->nonew            = 0;
1488   b->diag             = 0;
1489   b->solve_work       = 0;
1490   b->mult_work        = 0;
1491   b->spptr            = 0;
1492   B->info.nz_unneeded = (PetscReal)b->maxnz;
1493   b->keepzeroedrows   = PETSC_FALSE;
1494 
1495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1496                                      "MatStoreValues_SeqBAIJ",
1497                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1499                                      "MatRetrieveValues_SeqBAIJ",
1500                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1502                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1503                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1505                                      "MatConvert_SeqBAIJ_SeqAIJ",
1506                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 EXTERN_C_END
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1513 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1514 {
1515   Mat         C;
1516   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1517   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1518 
1519   PetscFunctionBegin;
1520   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1521 
1522   *B = 0;
1523   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1524   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1525   c    = (Mat_SeqBAIJ*)C->data;
1526 
1527   c->bs         = a->bs;
1528   c->bs2        = a->bs2;
1529   c->mbs        = a->mbs;
1530   c->nbs        = a->nbs;
1531   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1532 
1533   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1534   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1535   for (i=0; i<mbs; i++) {
1536     c->imax[i] = a->imax[i];
1537     c->ilen[i] = a->ilen[i];
1538   }
1539 
1540   /* allocate the matrix space */
1541   c->singlemalloc = PETSC_TRUE;
1542   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1543   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1544   c->j = (int*)(c->a + nz*bs2);
1545   c->i = c->j + nz;
1546   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1547   if (mbs > 0) {
1548     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1549     if (cpvalues == MAT_COPY_VALUES) {
1550       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1551     } else {
1552       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1553     }
1554   }
1555 
1556   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1557   c->sorted      = a->sorted;
1558   c->roworiented = a->roworiented;
1559   c->nonew       = a->nonew;
1560 
1561   if (a->diag) {
1562     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1563     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1564     for (i=0; i<mbs; i++) {
1565       c->diag[i] = a->diag[i];
1566     }
1567   } else c->diag        = 0;
1568   c->nz                 = a->nz;
1569   c->maxnz              = a->maxnz;
1570   c->solve_work         = 0;
1571   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1572   c->mult_work          = 0;
1573   C->preallocated       = PETSC_TRUE;
1574   C->assembled          = PETSC_TRUE;
1575   *B = C;
1576   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatLoad_SeqBAIJ"
1583 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1584 {
1585   Mat_SeqBAIJ  *a;
1586   Mat          B;
1587   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1588   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1589   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1590   int          *masked,nmask,tmp,bs2,ishift;
1591   PetscScalar  *aa;
1592   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1593 
1594   PetscFunctionBegin;
1595   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1596   bs2  = bs*bs;
1597 
1598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1599   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1600   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1601   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1602   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1603   M = header[1]; N = header[2]; nz = header[3];
1604 
1605   if (header[3] < 0) {
1606     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1607   }
1608 
1609   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1610 
1611   /*
1612      This code adds extra rows to make sure the number of rows is
1613     divisible by the blocksize
1614   */
1615   mbs        = M/bs;
1616   extra_rows = bs - M + bs*(mbs);
1617   if (extra_rows == bs) extra_rows = 0;
1618   else                  mbs++;
1619   if (extra_rows) {
1620     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1621   }
1622 
1623   /* read in row lengths */
1624   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1625   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1626   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1627 
1628   /* read in column indices */
1629   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1630   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1631   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1632 
1633   /* loop over row lengths determining block row lengths */
1634   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1635   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1636   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1637   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1638   masked   = mask + mbs;
1639   rowcount = 0; nzcount = 0;
1640   for (i=0; i<mbs; i++) {
1641     nmask = 0;
1642     for (j=0; j<bs; j++) {
1643       kmax = rowlengths[rowcount];
1644       for (k=0; k<kmax; k++) {
1645         tmp = jj[nzcount++]/bs;
1646         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1647       }
1648       rowcount++;
1649     }
1650     browlengths[i] += nmask;
1651     /* zero out the mask elements we set */
1652     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1653   }
1654 
1655   /* create our matrix */
1656   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1657   B = *A;
1658   a = (Mat_SeqBAIJ*)B->data;
1659 
1660   /* set matrix "i" values */
1661   a->i[0] = 0;
1662   for (i=1; i<= mbs; i++) {
1663     a->i[i]      = a->i[i-1] + browlengths[i-1];
1664     a->ilen[i-1] = browlengths[i-1];
1665   }
1666   a->nz         = 0;
1667   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1668 
1669   /* read in nonzero values */
1670   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1671   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1672   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1673 
1674   /* set "a" and "j" values into matrix */
1675   nzcount = 0; jcount = 0;
1676   for (i=0; i<mbs; i++) {
1677     nzcountb = nzcount;
1678     nmask    = 0;
1679     for (j=0; j<bs; j++) {
1680       kmax = rowlengths[i*bs+j];
1681       for (k=0; k<kmax; k++) {
1682         tmp = jj[nzcount++]/bs;
1683 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1684       }
1685     }
1686     /* sort the masked values */
1687     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1688 
1689     /* set "j" values into matrix */
1690     maskcount = 1;
1691     for (j=0; j<nmask; j++) {
1692       a->j[jcount++]  = masked[j];
1693       mask[masked[j]] = maskcount++;
1694     }
1695     /* set "a" values into matrix */
1696     ishift = bs2*a->i[i];
1697     for (j=0; j<bs; j++) {
1698       kmax = rowlengths[i*bs+j];
1699       for (k=0; k<kmax; k++) {
1700         tmp       = jj[nzcountb]/bs ;
1701         block     = mask[tmp] - 1;
1702         point     = jj[nzcountb] - bs*tmp;
1703         idx       = ishift + bs2*block + j + bs*point;
1704         a->a[idx] = (MatScalar)aa[nzcountb++];
1705       }
1706     }
1707     /* zero out the mask elements we set */
1708     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1709   }
1710   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1711 
1712   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1713   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1714   ierr = PetscFree(aa);CHKERRQ(ierr);
1715   ierr = PetscFree(jj);CHKERRQ(ierr);
1716   ierr = PetscFree(mask);CHKERRQ(ierr);
1717 
1718   B->assembled = PETSC_TRUE;
1719 
1720   ierr = MatView_Private(B);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatCreateSeqBAIJ"
1727 /*@C
1728    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1729    compressed row) format.  For good matrix assembly performance the
1730    user should preallocate the matrix storage by setting the parameter nz
1731    (or the array nnz).  By setting these parameters accurately, performance
1732    during matrix assembly can be increased by more than a factor of 50.
1733 
1734    Collective on MPI_Comm
1735 
1736    Input Parameters:
1737 +  comm - MPI communicator, set to PETSC_COMM_SELF
1738 .  bs - size of block
1739 .  m - number of rows
1740 .  n - number of columns
1741 .  nz - number of nonzero blocks  per block row (same for all rows)
1742 -  nnz - array containing the number of nonzero blocks in the various block rows
1743          (possibly different for each block row) or PETSC_NULL
1744 
1745    Output Parameter:
1746 .  A - the matrix
1747 
1748    Options Database Keys:
1749 .   -mat_no_unroll - uses code that does not unroll the loops in the
1750                      block calculations (much slower)
1751 .    -mat_block_size - size of the blocks to use
1752 
1753    Level: intermediate
1754 
1755    Notes:
1756    A nonzero block is any block that as 1 or more nonzeros in it
1757 
1758    The block AIJ format is fully compatible with standard Fortran 77
1759    storage.  That is, the stored row and column indices can begin at
1760    either one (as in Fortran) or zero.  See the users' manual for details.
1761 
1762    Specify the preallocated storage with either nz or nnz (not both).
1763    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1764    allocation.  For additional details, see the users manual chapter on
1765    matrices.
1766 
1767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1768 @*/
1769 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1770 {
1771   int ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1775   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1776   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1782 /*@C
1783    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1784    per row in the matrix. For good matrix assembly performance the
1785    user should preallocate the matrix storage by setting the parameter nz
1786    (or the array nnz).  By setting these parameters accurately, performance
1787    during matrix assembly can be increased by more than a factor of 50.
1788 
1789    Collective on MPI_Comm
1790 
1791    Input Parameters:
1792 +  A - the matrix
1793 .  bs - size of block
1794 .  nz - number of block nonzeros per block row (same for all rows)
1795 -  nnz - array containing the number of block nonzeros in the various block rows
1796          (possibly different for each block row) or PETSC_NULL
1797 
1798    Options Database Keys:
1799 .   -mat_no_unroll - uses code that does not unroll the loops in the
1800                      block calculations (much slower)
1801 .    -mat_block_size - size of the blocks to use
1802 
1803    Level: intermediate
1804 
1805    Notes:
1806    The block AIJ format is fully compatible with standard Fortran 77
1807    storage.  That is, the stored row and column indices can begin at
1808    either one (as in Fortran) or zero.  See the users' manual for details.
1809 
1810    Specify the preallocated storage with either nz or nnz (not both).
1811    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1812    allocation.  For additional details, see the users manual chapter on
1813    matrices.
1814 
1815 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1816 @*/
1817 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1818 {
1819   Mat_SeqBAIJ *b;
1820   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1821   PetscTruth  flg;
1822 
1823   PetscFunctionBegin;
1824   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1825   if (!flg) PetscFunctionReturn(0);
1826 
1827   B->preallocated = PETSC_TRUE;
1828   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1829   if (nnz && newbs != bs) {
1830     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1831   }
1832   bs   = newbs;
1833 
1834   mbs  = B->m/bs;
1835   nbs  = B->n/bs;
1836   bs2  = bs*bs;
1837 
1838   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1839     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1840   }
1841 
1842   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1843   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1844   if (nnz) {
1845     for (i=0; i<mbs; i++) {
1846       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1847       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1848     }
1849   }
1850 
1851   b       = (Mat_SeqBAIJ*)B->data;
1852   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1853   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1854   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1855   if (!flg) {
1856     switch (bs) {
1857     case 1:
1858       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1859       B->ops->mult            = MatMult_SeqBAIJ_1;
1860       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1861       break;
1862     case 2:
1863       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1864       B->ops->mult            = MatMult_SeqBAIJ_2;
1865       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1866       break;
1867     case 3:
1868       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1869       B->ops->mult            = MatMult_SeqBAIJ_3;
1870       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1871       break;
1872     case 4:
1873       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1874       B->ops->mult            = MatMult_SeqBAIJ_4;
1875       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1876       break;
1877     case 5:
1878       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1879       B->ops->mult            = MatMult_SeqBAIJ_5;
1880       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1881       break;
1882     case 6:
1883       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1884       B->ops->mult            = MatMult_SeqBAIJ_6;
1885       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1886       break;
1887     case 7:
1888       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1889       B->ops->mult            = MatMult_SeqBAIJ_7;
1890       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1891       break;
1892     default:
1893       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1894       B->ops->mult            = MatMult_SeqBAIJ_N;
1895       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1896       break;
1897     }
1898   }
1899   b->bs      = bs;
1900   b->mbs     = mbs;
1901   b->nbs     = nbs;
1902   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1903   if (!nnz) {
1904     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1905     else if (nz <= 0)        nz = 1;
1906     for (i=0; i<mbs; i++) b->imax[i] = nz;
1907     nz = nz*mbs;
1908   } else {
1909     nz = 0;
1910     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1911   }
1912 
1913   /* allocate the matrix space */
1914   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1915   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1916   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1917   b->j  = (int*)(b->a + nz*bs2);
1918   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1919   b->i  = b->j + nz;
1920   b->singlemalloc = PETSC_TRUE;
1921 
1922   b->i[0] = 0;
1923   for (i=1; i<mbs+1; i++) {
1924     b->i[i] = b->i[i-1] + b->imax[i-1];
1925   }
1926 
1927   /* b->ilen will count nonzeros in each block row so far. */
1928   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1929   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1930   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1931 
1932   b->bs               = bs;
1933   b->bs2              = bs2;
1934   b->mbs              = mbs;
1935   b->nz               = 0;
1936   b->maxnz            = nz*bs2;
1937   B->info.nz_unneeded = (PetscReal)b->maxnz;
1938   PetscFunctionReturn(0);
1939 }
1940 
1941