xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3477af423b0b44c4fccc752142a921a634b9177e)
1 /*$Id: baij.c,v 1.231 2001/07/11 04:27:45 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215 #if defined(PETSC_USE_MAT_SINGLE)
216     if (a->bs==4) {
217       PetscTruth sse_enabled;
218       int ierr;
219       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
220       if (sse_enabled) {
221         a->singleprecisionsolves = PETSC_TRUE;
222       } else {
223         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
224       }
225     } else {
226       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
227     }
228 #else
229     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
230 #endif
231     break;
232   default:
233     SETERRQ(PETSC_ERR_SUP,"unknown option");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
241 {
242   PetscFunctionBegin;
243   if (m) *m = 0;
244   if (n) *n = A->m;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatGetRow_SeqBAIJ"
250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
253   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
254   MatScalar    *aa,*aa_i;
255   Scalar       *v_i;
256 
257   PetscFunctionBegin;
258   bs  = a->bs;
259   ai  = a->i;
260   aj  = a->j;
261   aa  = a->a;
262   bs2 = a->bs2;
263 
264   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
265 
266   bn  = row/bs;   /* Block number */
267   bp  = row % bs; /* Block Position */
268   M   = ai[bn+1] - ai[bn];
269   *nz = bs*M;
270 
271   if (v) {
272     *v = 0;
273     if (*nz) {
274       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
275       for (i=0; i<M; i++) { /* for each block in the block row */
276         v_i  = *v + i*bs;
277         aa_i = aa + bs2*(ai[bn] + i);
278         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
279       }
280     }
281   }
282 
283   if (idx) {
284     *idx = 0;
285     if (*nz) {
286       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
287       for (i=0; i<M; i++) { /* for each block in the block row */
288         idx_i = *idx + i*bs;
289         itmp  = bs*aj[ai[bn] + i];
290         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
291       }
292     }
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
300 {
301   int ierr;
302 
303   PetscFunctionBegin;
304   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
305   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatTranspose_SeqBAIJ"
311 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
312 {
313   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
314   Mat         C;
315   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
316   int         *rows,*cols,bs2=a->bs2;
317   Scalar      *array;
318 
319   PetscFunctionBegin;
320   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
321   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
322   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
323 
324 #if defined(PETSC_USE_MAT_SINGLE)
325   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
326   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
327 #else
328   array = a->a;
329 #endif
330 
331   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
332   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
333   ierr = PetscFree(col);CHKERRQ(ierr);
334   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
335   cols = rows + bs;
336   for (i=0; i<mbs; i++) {
337     cols[0] = i*bs;
338     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
339     len = ai[i+1] - ai[i];
340     for (j=0; j<len; j++) {
341       rows[0] = (*aj++)*bs;
342       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
343       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
344       array += bs2;
345     }
346   }
347   ierr = PetscFree(rows);CHKERRQ(ierr);
348 #if defined(PETSC_USE_MAT_SINGLE)
349   ierr = PetscFree(array);
350 #endif
351 
352   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354 
355   if (B) {
356     *B = C;
357   } else {
358     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
366 {
367   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
369   Scalar      *aa;
370   FILE        *file;
371 
372   PetscFunctionBegin;
373   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
374   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
375   col_lens[0] = MAT_COOKIE;
376 
377   col_lens[1] = A->m;
378   col_lens[2] = A->n;
379   col_lens[3] = a->nz*bs2;
380 
381   /* store lengths of each row and write (including header) to file */
382   count = 0;
383   for (i=0; i<a->mbs; i++) {
384     for (j=0; j<bs; j++) {
385       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
386     }
387   }
388   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
389   ierr = PetscFree(col_lens);CHKERRQ(ierr);
390 
391   /* store column indices (zero start index) */
392   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
393   count = 0;
394   for (i=0; i<a->mbs; i++) {
395     for (j=0; j<bs; j++) {
396       for (k=a->i[i]; k<a->i[i+1]; k++) {
397         for (l=0; l<bs; l++) {
398           jj[count++] = bs*a->j[k] + l;
399         }
400       }
401     }
402   }
403   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
404   ierr = PetscFree(jj);CHKERRQ(ierr);
405 
406   /* store nonzero values */
407   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
408   count = 0;
409   for (i=0; i<a->mbs; i++) {
410     for (j=0; j<bs; j++) {
411       for (k=a->i[i]; k<a->i[i+1]; k++) {
412         for (l=0; l<bs; l++) {
413           aa[count++] = a->a[bs2*k + l*bs + j];
414         }
415       }
416     }
417   }
418   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
419   ierr = PetscFree(aa);CHKERRQ(ierr);
420 
421   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
422   if (file) {
423     fprintf(file,"-matload_block_size %d\n",a->bs);
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
442   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
443     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   PetscViewer  viewer;
511 
512   PetscFunctionBegin;
513 
514   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
515   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
516 
517   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
518 
519   /* loop over matrix elements drawing boxes */
520   color = PETSC_DRAW_BLUE;
521   for (i=0,row=0; i<mbs; i++,row+=bs) {
522     for (j=a->i[i]; j<a->i[i+1]; j++) {
523       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
524       x_l = a->j[j]*bs; x_r = x_l + 1.0;
525       aa = a->a + j*bs2;
526       for (k=0; k<bs; k++) {
527         for (l=0; l<bs; l++) {
528           if (PetscRealPart(*aa++) >=  0.) continue;
529           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
530         }
531       }
532     }
533   }
534   color = PETSC_DRAW_CYAN;
535   for (i=0,row=0; i<mbs; i++,row+=bs) {
536     for (j=a->i[i]; j<a->i[i+1]; j++) {
537       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for (k=0; k<bs; k++) {
541         for (l=0; l<bs; l++) {
542           if (PetscRealPart(*aa++) != 0.) continue;
543           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548 
549   color = PETSC_DRAW_RED;
550   for (i=0,row=0; i<mbs; i++,row+=bs) {
551     for (j=a->i[i]; j<a->i[i+1]; j++) {
552       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
553       x_l = a->j[j]*bs; x_r = x_l + 1.0;
554       aa = a->a + j*bs2;
555       for (k=0; k<bs; k++) {
556         for (l=0; l<bs; l++) {
557           if (PetscRealPart(*aa++) <= 0.) continue;
558           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
569 {
570   int          ierr;
571   PetscReal    xl,yl,xr,yr,w,h;
572   PetscDraw    draw;
573   PetscTruth   isnull;
574 
575   PetscFunctionBegin;
576 
577   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
578   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
579 
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
581   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
582   xr += w;    yr += h;  xl = -w;     yl = -h;
583   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
584   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatView_SeqBAIJ"
591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
592 {
593   int        ierr;
594   PetscTruth issocket,isascii,isbinary,isdraw;
595 
596   PetscFunctionBegin;
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601   if (issocket) {
602     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
603   } else if (isascii){
604     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
605   } else if (isbinary) {
606     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
607   } else if (isdraw) {
608     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
609   } else {
610     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetValues_SeqBAIJ"
618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
619 {
620   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
621   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
622   int        *ai = a->i,*ailen = a->ilen;
623   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
624   MatScalar  *ap,*aa = a->a,zero = 0.0;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over rows */
628     row  = im[k]; brow = row/bs;
629     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
630     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
632     nrow = ailen[brow];
633     for (l=0; l<n; l++) { /* loop over columns */
634       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
635       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
636       col  = in[l] ;
637       bcol = col/bs;
638       cidx = col%bs;
639       ridx = row%bs;
640       high = nrow;
641       low  = 0; /* assume unsorted */
642       while (high-low > 5) {
643         t = (low+high)/2;
644         if (rp[t] > bcol) high = t;
645         else             low  = t;
646       }
647       for (i=low; i<high; i++) {
648         if (rp[i] > bcol) break;
649         if (rp[i] == bcol) {
650           *v++ = ap[bs2*i+bs*cidx+ridx];
651           goto finished;
652         }
653       }
654       *v++ = zero;
655       finished:;
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #if defined(PETSC_USE_MAT_SINGLE)
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
665 {
666   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
667   int         ierr,i,N = m*n*b->bs2;
668   MatScalar   *vsingle;
669 
670   PetscFunctionBegin;
671   if (N > b->setvalueslen) {
672     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
673     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
674     b->setvalueslen  = N;
675   }
676   vsingle = b->setvaluescopy;
677   for (i=0; i<N; i++) {
678     vsingle[i] = v[i];
679   }
680   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 #endif
684 
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
689 {
690   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
691   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
692   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
693   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
694   PetscTruth  roworiented=a->roworiented;
695   MatScalar   *value = v,*ap,*aa = a->a,*bap;
696 
697   PetscFunctionBegin;
698   if (roworiented) {
699     stepval = (n-1)*bs;
700   } else {
701     stepval = (m-1)*bs;
702   }
703   for (k=0; k<m; k++) { /* loop over added rows */
704     row  = im[k];
705     if (row < 0) continue;
706 #if defined(PETSC_USE_BOPT_g)
707     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
708 #endif
709     rp   = aj + ai[row];
710     ap   = aa + bs2*ai[row];
711     rmax = imax[row];
712     nrow = ailen[row];
713     low  = 0;
714     for (l=0; l<n; l++) { /* loop over added columns */
715       if (in[l] < 0) continue;
716 #if defined(PETSC_USE_BOPT_g)
717       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
718 #endif
719       col = in[l];
720       if (roworiented) {
721         value = v + k*(stepval+bs)*bs + l*bs;
722       } else {
723         value = v + l*(stepval+bs)*bs + k*bs;
724       }
725       if (!sorted) low = 0; high = nrow;
726       while (high-low > 7) {
727         t = (low+high)/2;
728         if (rp[t] > col) high = t;
729         else             low  = t;
730       }
731       for (i=low; i<high; i++) {
732         if (rp[i] > col) break;
733         if (rp[i] == col) {
734           bap  = ap +  bs2*i;
735           if (roworiented) {
736             if (is == ADD_VALUES) {
737               for (ii=0; ii<bs; ii++,value+=stepval) {
738                 for (jj=ii; jj<bs2; jj+=bs) {
739                   bap[jj] += *value++;
740                 }
741               }
742             } else {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] = *value++;
746                 }
747               }
748             }
749           } else {
750             if (is == ADD_VALUES) {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=0; jj<bs; jj++) {
753                   *bap++ += *value++;
754                 }
755               }
756             } else {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++  = *value++;
760                 }
761               }
762             }
763           }
764           goto noinsert2;
765         }
766       }
767       if (nonew == 1) goto noinsert2;
768       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
769       if (nrow >= rmax) {
770         /* there is no extra room in row, therefore enlarge */
771         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
772         MatScalar *new_a;
773 
774         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775 
776         /* malloc new storage space */
777         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
778 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
779         new_j   = (int*)(new_a + bs2*new_nz);
780         new_i   = new_j + new_nz;
781 
782         /* copy over old data into new slots */
783         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
784         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
785         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
786         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
787         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
788         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
790         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
791         /* free up old matrix storage */
792         ierr = PetscFree(a->a);CHKERRQ(ierr);
793         if (!a->singlemalloc) {
794           ierr = PetscFree(a->i);CHKERRQ(ierr);
795           ierr = PetscFree(a->j);CHKERRQ(ierr);
796         }
797         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
798         a->singlemalloc = PETSC_TRUE;
799 
800         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
801         rmax = imax[row] = imax[row] + CHUNKSIZE;
802         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
803         a->maxnz += bs2*CHUNKSIZE;
804         a->reallocs++;
805         a->nz++;
806       }
807       N = nrow++ - 1;
808       /* shift up all the later entries in this row */
809       for (ii=N; ii>=i; ii--) {
810         rp[ii+1] = rp[ii];
811         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
812       }
813       if (N >= i) {
814         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
815       }
816       rp[i] = col;
817       bap   = ap +  bs2*i;
818       if (roworiented) {
819         for (ii=0; ii<bs; ii++,value+=stepval) {
820           for (jj=ii; jj<bs2; jj+=bs) {
821             bap[jj] = *value++;
822           }
823         }
824       } else {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=0; jj<bs; jj++) {
827             *bap++  = *value++;
828           }
829         }
830       }
831       noinsert2:;
832       low = i;
833     }
834     ailen[row] = nrow;
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
843 {
844   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
845   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
846   int        m = A->m,*ip,N,*ailen = a->ilen;
847   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
848   MatScalar  *aa = a->a,*ap;
849 
850   PetscFunctionBegin;
851   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
852 
853   if (m) rmax = ailen[0];
854   for (i=1; i<mbs; i++) {
855     /* move each row back by the amount of empty slots (fshift) before it*/
856     fshift += imax[i-1] - ailen[i-1];
857     rmax   = PetscMax(rmax,ailen[i]);
858     if (fshift) {
859       ip = aj + ai[i]; ap = aa + bs2*ai[i];
860       N = ailen[i];
861       for (j=0; j<N; j++) {
862         ip[j-fshift] = ip[j];
863         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
864       }
865     }
866     ai[i] = ai[i-1] + ailen[i-1];
867   }
868   if (mbs) {
869     fshift += imax[mbs-1] - ailen[mbs-1];
870     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
871   }
872   /* reset ilen and imax for each row */
873   for (i=0; i<mbs; i++) {
874     ailen[i] = imax[i] = ai[i+1] - ai[i];
875   }
876   a->nz = ai[mbs];
877 
878   /* diagonals may have moved, so kill the diagonal pointers */
879   if (fshift && a->diag) {
880     ierr = PetscFree(a->diag);CHKERRQ(ierr);
881     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
882     a->diag = 0;
883   }
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
885            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
886   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
887            a->reallocs);
888   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
889   a->reallocs          = 0;
890   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
891 
892   PetscFunctionReturn(0);
893 }
894 
895 
896 
897 /*
898    This function returns an array of flags which indicate the locations of contiguous
899    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
900    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
901    Assume: sizes should be long enough to hold all the values.
902 */
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
906 {
907   int        i,j,k,row;
908   PetscTruth flg;
909 
910   PetscFunctionBegin;
911   for (i=0,j=0; i<n; j++) {
912     row = idx[i];
913     if (row%bs!=0) { /* Not the begining of a block */
914       sizes[j] = 1;
915       i++;
916     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
917       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
918       i++;
919     } else { /* Begining of the block, so check if the complete block exists */
920       flg = PETSC_TRUE;
921       for (k=1; k<bs; k++) {
922         if (row+k != idx[i+k]) { /* break in the block */
923           flg = PETSC_FALSE;
924           break;
925         }
926       }
927       if (flg == PETSC_TRUE) { /* No break in the bs */
928         sizes[j] = bs;
929         i+= bs;
930       } else {
931         sizes[j] = 1;
932         i++;
933       }
934     }
935   }
936   *bs_max = j;
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
943 {
944   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
945   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
946   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
947   Scalar      zero = 0.0;
948   MatScalar   *aa;
949 
950   PetscFunctionBegin;
951   /* Make a copy of the IS and  sort it */
952   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
953   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
954 
955   /* allocate memory for rows,sizes */
956   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
957   sizes = rows + is_n;
958 
959   /* copy IS values to rows, and sort them */
960   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
961   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
962   if (baij->keepzeroedrows) {
963     for (i=0; i<is_n; i++) { sizes[i] = 1; }
964     bs_max = is_n;
965   } else {
966     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
967   }
968   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
969 
970   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
971     row   = rows[j];
972     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
973     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
974     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
975     if (sizes[i] == bs && !baij->keepzeroedrows) {
976       if (diag) {
977         if (baij->ilen[row/bs] > 0) {
978           baij->ilen[row/bs]       = 1;
979           baij->j[baij->i[row/bs]] = row/bs;
980           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
981         }
982         /* Now insert all the diagonal values for this bs */
983         for (k=0; k<bs; k++) {
984           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
985         }
986       } else { /* (!diag) */
987         baij->ilen[row/bs] = 0;
988       } /* end (!diag) */
989     } else { /* (sizes[i] != bs) */
990 #if defined (PETSC_USE_DEBUG)
991       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
992 #endif
993       for (k=0; k<count; k++) {
994         aa[0] =  zero;
995         aa    += bs;
996       }
997       if (diag) {
998         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
999       }
1000     }
1001   }
1002 
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1011 {
1012   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1013   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1014   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1015   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1016   int         ridx,cidx,bs2=a->bs2,ierr;
1017   PetscTruth  roworiented=a->roworiented;
1018   MatScalar   *ap,value,*aa=a->a,*bap;
1019 
1020   PetscFunctionBegin;
1021   for (k=0; k<m; k++) { /* loop over added rows */
1022     row  = im[k]; brow = row/bs;
1023     if (row < 0) continue;
1024 #if defined(PETSC_USE_BOPT_g)
1025     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1026 #endif
1027     rp   = aj + ai[brow];
1028     ap   = aa + bs2*ai[brow];
1029     rmax = imax[brow];
1030     nrow = ailen[brow];
1031     low  = 0;
1032     for (l=0; l<n; l++) { /* loop over added columns */
1033       if (in[l] < 0) continue;
1034 #if defined(PETSC_USE_BOPT_g)
1035       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1036 #endif
1037       col = in[l]; bcol = col/bs;
1038       ridx = row % bs; cidx = col % bs;
1039       if (roworiented) {
1040         value = v[l + k*n];
1041       } else {
1042         value = v[k + l*m];
1043       }
1044       if (!sorted) low = 0; high = nrow;
1045       while (high-low > 7) {
1046         t = (low+high)/2;
1047         if (rp[t] > bcol) high = t;
1048         else              low  = t;
1049       }
1050       for (i=low; i<high; i++) {
1051         if (rp[i] > bcol) break;
1052         if (rp[i] == bcol) {
1053           bap  = ap +  bs2*i + bs*cidx + ridx;
1054           if (is == ADD_VALUES) *bap += value;
1055           else                  *bap  = value;
1056           goto noinsert1;
1057         }
1058       }
1059       if (nonew == 1) goto noinsert1;
1060       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1061       if (nrow >= rmax) {
1062         /* there is no extra room in row, therefore enlarge */
1063         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1064         MatScalar *new_a;
1065 
1066         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1067 
1068         /* Malloc new storage space */
1069         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1070 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1071         new_j   = (int*)(new_a + bs2*new_nz);
1072         new_i   = new_j + new_nz;
1073 
1074         /* copy over old data into new slots */
1075         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1076         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1077         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1078         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1079         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1080         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1082         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1083         /* free up old matrix storage */
1084         ierr = PetscFree(a->a);CHKERRQ(ierr);
1085         if (!a->singlemalloc) {
1086           ierr = PetscFree(a->i);CHKERRQ(ierr);
1087           ierr = PetscFree(a->j);CHKERRQ(ierr);
1088         }
1089         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1090         a->singlemalloc = PETSC_TRUE;
1091 
1092         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1093         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1094         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1095         a->maxnz += bs2*CHUNKSIZE;
1096         a->reallocs++;
1097         a->nz++;
1098       }
1099       N = nrow++ - 1;
1100       /* shift up all the later entries in this row */
1101       for (ii=N; ii>=i; ii--) {
1102         rp[ii+1] = rp[ii];
1103         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1104       }
1105       if (N>=i) {
1106         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1107       }
1108       rp[i]                      = bcol;
1109       ap[bs2*i + bs*cidx + ridx] = value;
1110       noinsert1:;
1111       low = i;
1112     }
1113     ailen[brow] = nrow;
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*);
1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*);
1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat);
1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat);
1132 
1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1148 
1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1156 
1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1165 
1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1178 {
1179   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1180   Mat         outA;
1181   int         ierr;
1182   PetscTruth  row_identity,col_identity;
1183 
1184   PetscFunctionBegin;
1185   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1186   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1187   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1188   if (!row_identity || !col_identity) {
1189     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1190   }
1191 
1192   outA          = inA;
1193   inA->factor   = FACTOR_LU;
1194 
1195   if (!a->diag) {
1196     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1197   }
1198   /*
1199       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1200       for ILU(0) factorization with natural ordering
1201   */
1202   switch (a->bs) {
1203   case 1:
1204     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1205     inA->ops->solve           = MatSolve_SeqBAIJ_1_NaturalOrdering;
1206     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
1207     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1208   case 2:
1209     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1210     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1211     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1212     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1213     break;
1214   case 3:
1215     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1216     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1217     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1218     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1219     break;
1220   case 4:
1221 #if defined(PETSC_HAVE_SSE)
1222     {
1223       PetscTruth sse_enabled,single_prec,flg;
1224       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
1225       if (sse_enabled) {
1226         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
1227         /* What should this prefix be mat_solve, pc_solve, check both? */
1228         ierr = PetscOptionsGetLogical(PETSC_NULL,"-single_precision",&single_prec,&flg);CHKERRQ(ierr);
1229         if (flg) {
1230           if (single_prec) {
1231             a->singleprecisionsolves = PETSC_TRUE;
1232           } else {
1233             a->singleprecisionsolves = PETSC_FALSE;
1234           }
1235         }
1236         if (a->singleprecisionsolves) {
1237           inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion;
1238         } else {
1239           inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1240         }
1241       } else {
1242         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1243         inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1244       }
1245     }
1246 #else
1247     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1248     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1249 #endif
1250     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1251     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1252     break;
1253   case 5:
1254     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1255     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1256     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1257     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1258     break;
1259   case 6:
1260     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1261     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1262     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1263     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1264     break;
1265   case 7:
1266     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1267     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1268     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1269     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1270     break;
1271   default:
1272     a->row        = row;
1273     a->col        = col;
1274     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1275     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1276 
1277     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1278     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1279     PetscLogObjectParent(inA,a->icol);
1280 
1281     if (!a->solve_work) {
1282       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1283       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1284     }
1285   }
1286 
1287   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1288 
1289   PetscFunctionReturn(0);
1290 }
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1293 int MatPrintHelp_SeqBAIJ(Mat A)
1294 {
1295   static PetscTruth called = PETSC_FALSE;
1296   MPI_Comm          comm = A->comm;
1297   int               ierr;
1298 
1299   PetscFunctionBegin;
1300   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1301   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1302   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 EXTERN_C_BEGIN
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1309 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1310 {
1311   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1312   int         i,nz,n;
1313 
1314   PetscFunctionBegin;
1315   nz = baij->maxnz;
1316   n  = mat->n;
1317   for (i=0; i<nz; i++) {
1318     baij->j[i] = indices[i];
1319   }
1320   baij->nz = nz;
1321   for (i=0; i<n; i++) {
1322     baij->ilen[i] = baij->imax[i];
1323   }
1324 
1325   PetscFunctionReturn(0);
1326 }
1327 EXTERN_C_END
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1331 /*@
1332     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1333        in the matrix.
1334 
1335   Input Parameters:
1336 +  mat - the SeqBAIJ matrix
1337 -  indices - the column indices
1338 
1339   Level: advanced
1340 
1341   Notes:
1342     This can be called if you have precomputed the nonzero structure of the
1343   matrix and want to provide it to the matrix object to improve the performance
1344   of the MatSetValues() operation.
1345 
1346     You MUST have set the correct numbers of nonzeros per row in the call to
1347   MatCreateSeqBAIJ().
1348 
1349     MUST be called before any calls to MatSetValues();
1350 
1351 @*/
1352 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1353 {
1354   int ierr,(*f)(Mat,int *);
1355 
1356   PetscFunctionBegin;
1357   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1358   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1359   if (f) {
1360     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1361   } else {
1362     SETERRQ(1,"Wrong type of matrix to set column indices");
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1369 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1370 {
1371   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1372   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1373   PetscReal    atmp;
1374   Scalar       *x,zero = 0.0;
1375   MatScalar    *aa;
1376   int          ncols,brow,krow,kcol;
1377 
1378   PetscFunctionBegin;
1379   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1380   bs   = a->bs;
1381   aa   = a->a;
1382   ai   = a->i;
1383   aj   = a->j;
1384   mbs = a->mbs;
1385 
1386   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1387   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1388   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1389   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1390   for (i=0; i<mbs; i++) {
1391     ncols = ai[1] - ai[0]; ai++;
1392     brow  = bs*i;
1393     for (j=0; j<ncols; j++){
1394       /* bcol = bs*(*aj); */
1395       for (kcol=0; kcol<bs; kcol++){
1396         for (krow=0; krow<bs; krow++){
1397           atmp = PetscAbsScalar(*aa); aa++;
1398           row = brow + krow;    /* row index */
1399           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1400           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1401         }
1402       }
1403       aj++;
1404     }
1405   }
1406   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1412 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1413 {
1414   int        ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1423 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1424 {
1425   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1426   PetscFunctionBegin;
1427   *array = a->a;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1433 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1434 {
1435   PetscFunctionBegin;
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 /* -------------------------------------------------------------------*/
1440 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1441        MatGetRow_SeqBAIJ,
1442        MatRestoreRow_SeqBAIJ,
1443        MatMult_SeqBAIJ_N,
1444        MatMultAdd_SeqBAIJ_N,
1445        MatMultTranspose_SeqBAIJ,
1446        MatMultTransposeAdd_SeqBAIJ,
1447        MatSolve_SeqBAIJ_N,
1448        0,
1449        0,
1450        0,
1451        MatLUFactor_SeqBAIJ,
1452        0,
1453        0,
1454        MatTranspose_SeqBAIJ,
1455        MatGetInfo_SeqBAIJ,
1456        MatEqual_SeqBAIJ,
1457        MatGetDiagonal_SeqBAIJ,
1458        MatDiagonalScale_SeqBAIJ,
1459        MatNorm_SeqBAIJ,
1460        0,
1461        MatAssemblyEnd_SeqBAIJ,
1462        0,
1463        MatSetOption_SeqBAIJ,
1464        MatZeroEntries_SeqBAIJ,
1465        MatZeroRows_SeqBAIJ,
1466        MatLUFactorSymbolic_SeqBAIJ,
1467        MatLUFactorNumeric_SeqBAIJ_N,
1468        0,
1469        0,
1470        MatSetUpPreallocation_SeqBAIJ,
1471        0,
1472        MatGetOwnershipRange_SeqBAIJ,
1473        MatILUFactorSymbolic_SeqBAIJ,
1474        0,
1475        MatGetArray_SeqBAIJ,
1476        MatRestoreArray_SeqBAIJ,
1477        MatDuplicate_SeqBAIJ,
1478        0,
1479        0,
1480        MatILUFactor_SeqBAIJ,
1481        0,
1482        0,
1483        MatGetSubMatrices_SeqBAIJ,
1484        MatIncreaseOverlap_SeqBAIJ,
1485        MatGetValues_SeqBAIJ,
1486        0,
1487        MatPrintHelp_SeqBAIJ,
1488        MatScale_SeqBAIJ,
1489        0,
1490        0,
1491        0,
1492        MatGetBlockSize_SeqBAIJ,
1493        MatGetRowIJ_SeqBAIJ,
1494        MatRestoreRowIJ_SeqBAIJ,
1495        0,
1496        0,
1497        0,
1498        0,
1499        0,
1500        0,
1501        MatSetValuesBlocked_SeqBAIJ,
1502        MatGetSubMatrix_SeqBAIJ,
1503        MatDestroy_SeqBAIJ,
1504        MatView_SeqBAIJ,
1505        MatGetMaps_Petsc,
1506        0,
1507        0,
1508        0,
1509        0,
1510        0,
1511        0,
1512        MatGetRowMax_SeqBAIJ,
1513        MatConvert_Basic};
1514 
1515 EXTERN_C_BEGIN
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1518 int MatStoreValues_SeqBAIJ(Mat mat)
1519 {
1520   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1521   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1522   int         ierr;
1523 
1524   PetscFunctionBegin;
1525   if (aij->nonew != 1) {
1526     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1527   }
1528 
1529   /* allocate space for values if not already there */
1530   if (!aij->saved_values) {
1531     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1532   }
1533 
1534   /* copy values over */
1535   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 EXTERN_C_END
1539 
1540 EXTERN_C_BEGIN
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1543 int MatRetrieveValues_SeqBAIJ(Mat mat)
1544 {
1545   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1546   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1547 
1548   PetscFunctionBegin;
1549   if (aij->nonew != 1) {
1550     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1551   }
1552   if (!aij->saved_values) {
1553     SETERRQ(1,"Must call MatStoreValues(A);first");
1554   }
1555 
1556   /* copy values over */
1557   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 EXTERN_C_BEGIN
1563 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1564 EXTERN_C_END
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatCreate_SeqBAIJ"
1569 int MatCreate_SeqBAIJ(Mat B)
1570 {
1571   int         ierr,size;
1572   Mat_SeqBAIJ *b;
1573 
1574   PetscFunctionBegin;
1575   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1576   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1577 
1578   B->m = B->M = PetscMax(B->m,B->M);
1579   B->n = B->N = PetscMax(B->n,B->N);
1580   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1581   B->data = (void*)b;
1582   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1583   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1584   B->factor           = 0;
1585   B->lupivotthreshold = 1.0;
1586   B->mapping          = 0;
1587   b->row              = 0;
1588   b->col              = 0;
1589   b->icol             = 0;
1590   b->reallocs         = 0;
1591   b->saved_values     = 0;
1592 #if defined(PETSC_USE_MAT_SINGLE)
1593   b->setvalueslen     = 0;
1594   b->setvaluescopy    = PETSC_NULL;
1595 #endif
1596 
1597   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1598   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1599 
1600   b->sorted           = PETSC_FALSE;
1601   b->roworiented      = PETSC_TRUE;
1602   b->nonew            = 0;
1603   b->diag             = 0;
1604   b->solve_work       = 0;
1605   b->mult_work        = 0;
1606   b->spptr            = 0;
1607   B->info.nz_unneeded = (PetscReal)b->maxnz;
1608   b->keepzeroedrows   = PETSC_FALSE;
1609 
1610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1611                                      "MatStoreValues_SeqBAIJ",
1612                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1614                                      "MatRetrieveValues_SeqBAIJ",
1615                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1617                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1618                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1620                                      "MatConvert_SeqBAIJ_SeqAIJ",
1621                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1622   PetscFunctionReturn(0);
1623 }
1624 EXTERN_C_END
1625 
1626 #undef __FUNCT__
1627 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1628 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1629 {
1630   Mat         C;
1631   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1632   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1633 
1634   PetscFunctionBegin;
1635   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1636 
1637   *B = 0;
1638   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1639   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1640   c    = (Mat_SeqBAIJ*)C->data;
1641 
1642   c->bs         = a->bs;
1643   c->bs2        = a->bs2;
1644   c->mbs        = a->mbs;
1645   c->nbs        = a->nbs;
1646   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1647 
1648   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1649   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1650   for (i=0; i<mbs; i++) {
1651     c->imax[i] = a->imax[i];
1652     c->ilen[i] = a->ilen[i];
1653   }
1654 
1655   /* allocate the matrix space */
1656   c->singlemalloc = PETSC_TRUE;
1657   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1658   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1659   c->j = (int*)(c->a + nz*bs2);
1660   c->i = c->j + nz;
1661   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1662   if (mbs > 0) {
1663     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1664     if (cpvalues == MAT_COPY_VALUES) {
1665       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1666     } else {
1667       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1668     }
1669   }
1670 
1671   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1672   c->sorted      = a->sorted;
1673   c->roworiented = a->roworiented;
1674   c->nonew       = a->nonew;
1675 
1676   if (a->diag) {
1677     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1678     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1679     for (i=0; i<mbs; i++) {
1680       c->diag[i] = a->diag[i];
1681     }
1682   } else c->diag        = 0;
1683   c->nz                 = a->nz;
1684   c->maxnz              = a->maxnz;
1685   c->solve_work         = 0;
1686   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1687   c->mult_work          = 0;
1688   C->preallocated       = PETSC_TRUE;
1689   C->assembled          = PETSC_TRUE;
1690   *B = C;
1691   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 EXTERN_C_BEGIN
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatLoad_SeqBAIJ"
1698 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1699 {
1700   Mat_SeqBAIJ  *a;
1701   Mat          B;
1702   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1703   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1704   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1705   int          *masked,nmask,tmp,bs2,ishift;
1706   Scalar       *aa;
1707   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1711   bs2  = bs*bs;
1712 
1713   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1714   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1715   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1716   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1717   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1718   M = header[1]; N = header[2]; nz = header[3];
1719 
1720   if (header[3] < 0) {
1721     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1722   }
1723 
1724   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1725 
1726   /*
1727      This code adds extra rows to make sure the number of rows is
1728     divisible by the blocksize
1729   */
1730   mbs        = M/bs;
1731   extra_rows = bs - M + bs*(mbs);
1732   if (extra_rows == bs) extra_rows = 0;
1733   else                  mbs++;
1734   if (extra_rows) {
1735     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1736   }
1737 
1738   /* read in row lengths */
1739   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1740   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1741   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1742 
1743   /* read in column indices */
1744   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1745   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1746   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1747 
1748   /* loop over row lengths determining block row lengths */
1749   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1750   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1751   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1752   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1753   masked   = mask + mbs;
1754   rowcount = 0; nzcount = 0;
1755   for (i=0; i<mbs; i++) {
1756     nmask = 0;
1757     for (j=0; j<bs; j++) {
1758       kmax = rowlengths[rowcount];
1759       for (k=0; k<kmax; k++) {
1760         tmp = jj[nzcount++]/bs;
1761         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1762       }
1763       rowcount++;
1764     }
1765     browlengths[i] += nmask;
1766     /* zero out the mask elements we set */
1767     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1768   }
1769 
1770   /* create our matrix */
1771   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1772   B = *A;
1773   a = (Mat_SeqBAIJ*)B->data;
1774 
1775   /* set matrix "i" values */
1776   a->i[0] = 0;
1777   for (i=1; i<= mbs; i++) {
1778     a->i[i]      = a->i[i-1] + browlengths[i-1];
1779     a->ilen[i-1] = browlengths[i-1];
1780   }
1781   a->nz         = 0;
1782   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1783 
1784   /* read in nonzero values */
1785   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1786   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1787   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1788 
1789   /* set "a" and "j" values into matrix */
1790   nzcount = 0; jcount = 0;
1791   for (i=0; i<mbs; i++) {
1792     nzcountb = nzcount;
1793     nmask    = 0;
1794     for (j=0; j<bs; j++) {
1795       kmax = rowlengths[i*bs+j];
1796       for (k=0; k<kmax; k++) {
1797         tmp = jj[nzcount++]/bs;
1798 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1799       }
1800     }
1801     /* sort the masked values */
1802     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1803 
1804     /* set "j" values into matrix */
1805     maskcount = 1;
1806     for (j=0; j<nmask; j++) {
1807       a->j[jcount++]  = masked[j];
1808       mask[masked[j]] = maskcount++;
1809     }
1810     /* set "a" values into matrix */
1811     ishift = bs2*a->i[i];
1812     for (j=0; j<bs; j++) {
1813       kmax = rowlengths[i*bs+j];
1814       for (k=0; k<kmax; k++) {
1815         tmp       = jj[nzcountb]/bs ;
1816         block     = mask[tmp] - 1;
1817         point     = jj[nzcountb] - bs*tmp;
1818         idx       = ishift + bs2*block + j + bs*point;
1819         a->a[idx] = (MatScalar)aa[nzcountb++];
1820       }
1821     }
1822     /* zero out the mask elements we set */
1823     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1824   }
1825   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1826 
1827   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1828   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1829   ierr = PetscFree(aa);CHKERRQ(ierr);
1830   ierr = PetscFree(jj);CHKERRQ(ierr);
1831   ierr = PetscFree(mask);CHKERRQ(ierr);
1832 
1833   B->assembled = PETSC_TRUE;
1834 
1835   ierr = MatView_Private(B);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 EXTERN_C_END
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatCreateSeqBAIJ"
1842 /*@C
1843    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1844    compressed row) format.  For good matrix assembly performance the
1845    user should preallocate the matrix storage by setting the parameter nz
1846    (or the array nnz).  By setting these parameters accurately, performance
1847    during matrix assembly can be increased by more than a factor of 50.
1848 
1849    Collective on MPI_Comm
1850 
1851    Input Parameters:
1852 +  comm - MPI communicator, set to PETSC_COMM_SELF
1853 .  bs - size of block
1854 .  m - number of rows
1855 .  n - number of columns
1856 .  nz - number of nonzero blocks  per block row (same for all rows)
1857 -  nnz - array containing the number of nonzero blocks in the various block rows
1858          (possibly different for each block row) or PETSC_NULL
1859 
1860    Output Parameter:
1861 .  A - the matrix
1862 
1863    Options Database Keys:
1864 .   -mat_no_unroll - uses code that does not unroll the loops in the
1865                      block calculations (much slower)
1866 .    -mat_block_size - size of the blocks to use
1867 
1868    Level: intermediate
1869 
1870    Notes:
1871    A nonzero block is any block that as 1 or more nonzeros in it
1872 
1873    The block AIJ format is fully compatible with standard Fortran 77
1874    storage.  That is, the stored row and column indices can begin at
1875    either one (as in Fortran) or zero.  See the users' manual for details.
1876 
1877    Specify the preallocated storage with either nz or nnz (not both).
1878    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1879    allocation.  For additional details, see the users manual chapter on
1880    matrices.
1881 
1882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1883 @*/
1884 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1885 {
1886   int ierr;
1887 
1888   PetscFunctionBegin;
1889   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1890   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1891   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1897 /*@C
1898    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1899    per row in the matrix. For good matrix assembly performance the
1900    user should preallocate the matrix storage by setting the parameter nz
1901    (or the array nnz).  By setting these parameters accurately, performance
1902    during matrix assembly can be increased by more than a factor of 50.
1903 
1904    Collective on MPI_Comm
1905 
1906    Input Parameters:
1907 +  A - the matrix
1908 .  bs - size of block
1909 .  nz - number of block nonzeros per block row (same for all rows)
1910 -  nnz - array containing the number of block nonzeros in the various block rows
1911          (possibly different for each block row) or PETSC_NULL
1912 
1913    Options Database Keys:
1914 .   -mat_no_unroll - uses code that does not unroll the loops in the
1915                      block calculations (much slower)
1916 .    -mat_block_size - size of the blocks to use
1917 
1918    Level: intermediate
1919 
1920    Notes:
1921    The block AIJ format is fully compatible with standard Fortran 77
1922    storage.  That is, the stored row and column indices can begin at
1923    either one (as in Fortran) or zero.  See the users' manual for details.
1924 
1925    Specify the preallocated storage with either nz or nnz (not both).
1926    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1927    allocation.  For additional details, see the users manual chapter on
1928    matrices.
1929 
1930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1931 @*/
1932 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1933 {
1934   Mat_SeqBAIJ *b;
1935   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1936   PetscTruth  flg;
1937 
1938   PetscFunctionBegin;
1939   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1940   if (!flg) PetscFunctionReturn(0);
1941 
1942   B->preallocated = PETSC_TRUE;
1943   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1944   if (nnz && newbs != bs) {
1945     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1946   }
1947   bs   = newbs;
1948 
1949   mbs  = B->m/bs;
1950   nbs  = B->n/bs;
1951   bs2  = bs*bs;
1952 
1953   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1954     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1955   }
1956 
1957   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1958   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1959   if (nnz) {
1960     for (i=0; i<mbs; i++) {
1961       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1962       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1963     }
1964   }
1965 
1966   b       = (Mat_SeqBAIJ*)B->data;
1967   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1968   if (!flg) {
1969     switch (bs) {
1970     case 1:
1971       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1972       B->ops->solve           = MatSolve_SeqBAIJ_1;
1973       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1974       B->ops->mult            = MatMult_SeqBAIJ_1;
1975       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1976       break;
1977     case 2:
1978       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1979       B->ops->solve           = MatSolve_SeqBAIJ_2;
1980       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1981       B->ops->mult            = MatMult_SeqBAIJ_2;
1982       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1983       break;
1984     case 3:
1985       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1986       B->ops->solve           = MatSolve_SeqBAIJ_3;
1987       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1988       B->ops->mult            = MatMult_SeqBAIJ_3;
1989       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1990       break;
1991     case 4:
1992       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1993       B->ops->solve           = MatSolve_SeqBAIJ_4;
1994       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1995       B->ops->mult            = MatMult_SeqBAIJ_4;
1996       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1997       break;
1998     case 5:
1999       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2000       B->ops->solve           = MatSolve_SeqBAIJ_5;
2001       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
2002       B->ops->mult            = MatMult_SeqBAIJ_5;
2003       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2004       break;
2005     case 6:
2006       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2007       B->ops->solve           = MatSolve_SeqBAIJ_6;
2008       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
2009       B->ops->mult            = MatMult_SeqBAIJ_6;
2010       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2011       break;
2012     case 7:
2013       B->ops->mult            = MatMult_SeqBAIJ_7;
2014       B->ops->solve           = MatSolve_SeqBAIJ_7;
2015       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
2016       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2017       break;
2018     default:
2019       B->ops->mult            = MatMult_SeqBAIJ_N;
2020       B->ops->solve           = MatSolve_SeqBAIJ_N;
2021       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2022       break;
2023     }
2024   }
2025   b->bs      = bs;
2026   b->mbs     = mbs;
2027   b->nbs     = nbs;
2028   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2029   if (!nnz) {
2030     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2031     else if (nz <= 0)        nz = 1;
2032     for (i=0; i<mbs; i++) b->imax[i] = nz;
2033     nz = nz*mbs;
2034   } else {
2035     nz = 0;
2036     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2037   }
2038 
2039   /* allocate the matrix space */
2040   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2041   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2042   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2043   b->j  = (int*)(b->a + nz*bs2);
2044   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2045   b->i  = b->j + nz;
2046   b->singlemalloc = PETSC_TRUE;
2047 
2048   b->i[0] = 0;
2049   for (i=1; i<mbs+1; i++) {
2050     b->i[i] = b->i[i-1] + b->imax[i-1];
2051   }
2052 
2053   /* b->ilen will count nonzeros in each block row so far. */
2054   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2055   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2056   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2057 
2058   b->bs               = bs;
2059   b->bs2              = bs2;
2060   b->mbs              = mbs;
2061   b->nz               = 0;
2062   b->maxnz            = nz*bs2;
2063   B->info.nz_unneeded = (PetscReal)b->maxnz;
2064   PetscFunctionReturn(0);
2065 }
2066 
2067