xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2ffa5d78833ea073cb9078e022579916a4e55a12)
1 /*$Id: baij.c,v 1.232 2001/07/11 19:12:27 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215 #if defined(PETSC_USE_MAT_SINGLE)
216     if (a->bs==4) {
217       PetscTruth sse_enabled;
218       int ierr;
219       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
220       if (sse_enabled) {
221         a->singleprecisionsolves = PETSC_TRUE;
222       } else {
223         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
224       }
225     } else {
226       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
227     }
228 #else
229     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
230 #endif
231     break;
232   default:
233     SETERRQ(PETSC_ERR_SUP,"unknown option");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
241 {
242   PetscFunctionBegin;
243   if (m) *m = 0;
244   if (n) *n = A->m;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatGetRow_SeqBAIJ"
250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
253   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
254   MatScalar    *aa,*aa_i;
255   Scalar       *v_i;
256 
257   PetscFunctionBegin;
258   bs  = a->bs;
259   ai  = a->i;
260   aj  = a->j;
261   aa  = a->a;
262   bs2 = a->bs2;
263 
264   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
265 
266   bn  = row/bs;   /* Block number */
267   bp  = row % bs; /* Block Position */
268   M   = ai[bn+1] - ai[bn];
269   *nz = bs*M;
270 
271   if (v) {
272     *v = 0;
273     if (*nz) {
274       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
275       for (i=0; i<M; i++) { /* for each block in the block row */
276         v_i  = *v + i*bs;
277         aa_i = aa + bs2*(ai[bn] + i);
278         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
279       }
280     }
281   }
282 
283   if (idx) {
284     *idx = 0;
285     if (*nz) {
286       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
287       for (i=0; i<M; i++) { /* for each block in the block row */
288         idx_i = *idx + i*bs;
289         itmp  = bs*aj[ai[bn] + i];
290         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
291       }
292     }
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
300 {
301   int ierr;
302 
303   PetscFunctionBegin;
304   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
305   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatTranspose_SeqBAIJ"
311 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
312 {
313   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
314   Mat         C;
315   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
316   int         *rows,*cols,bs2=a->bs2;
317   Scalar      *array;
318 
319   PetscFunctionBegin;
320   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
321   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
322   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
323 
324 #if defined(PETSC_USE_MAT_SINGLE)
325   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
326   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
327 #else
328   array = a->a;
329 #endif
330 
331   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
332   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
333   ierr = PetscFree(col);CHKERRQ(ierr);
334   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
335   cols = rows + bs;
336   for (i=0; i<mbs; i++) {
337     cols[0] = i*bs;
338     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
339     len = ai[i+1] - ai[i];
340     for (j=0; j<len; j++) {
341       rows[0] = (*aj++)*bs;
342       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
343       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
344       array += bs2;
345     }
346   }
347   ierr = PetscFree(rows);CHKERRQ(ierr);
348 #if defined(PETSC_USE_MAT_SINGLE)
349   ierr = PetscFree(array);
350 #endif
351 
352   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354 
355   if (B) {
356     *B = C;
357   } else {
358     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
366 {
367   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
369   Scalar      *aa;
370   FILE        *file;
371 
372   PetscFunctionBegin;
373   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
374   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
375   col_lens[0] = MAT_COOKIE;
376 
377   col_lens[1] = A->m;
378   col_lens[2] = A->n;
379   col_lens[3] = a->nz*bs2;
380 
381   /* store lengths of each row and write (including header) to file */
382   count = 0;
383   for (i=0; i<a->mbs; i++) {
384     for (j=0; j<bs; j++) {
385       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
386     }
387   }
388   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
389   ierr = PetscFree(col_lens);CHKERRQ(ierr);
390 
391   /* store column indices (zero start index) */
392   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
393   count = 0;
394   for (i=0; i<a->mbs; i++) {
395     for (j=0; j<bs; j++) {
396       for (k=a->i[i]; k<a->i[i+1]; k++) {
397         for (l=0; l<bs; l++) {
398           jj[count++] = bs*a->j[k] + l;
399         }
400       }
401     }
402   }
403   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
404   ierr = PetscFree(jj);CHKERRQ(ierr);
405 
406   /* store nonzero values */
407   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
408   count = 0;
409   for (i=0; i<a->mbs; i++) {
410     for (j=0; j<bs; j++) {
411       for (k=a->i[i]; k<a->i[i+1]; k++) {
412         for (l=0; l<bs; l++) {
413           aa[count++] = a->a[bs2*k + l*bs + j];
414         }
415       }
416     }
417   }
418   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
419   ierr = PetscFree(aa);CHKERRQ(ierr);
420 
421   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
422   if (file) {
423     fprintf(file,"-matload_block_size %d\n",a->bs);
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
442   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
443     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   PetscViewer  viewer;
511 
512   PetscFunctionBegin;
513 
514   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
515   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
516 
517   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
518 
519   /* loop over matrix elements drawing boxes */
520   color = PETSC_DRAW_BLUE;
521   for (i=0,row=0; i<mbs; i++,row+=bs) {
522     for (j=a->i[i]; j<a->i[i+1]; j++) {
523       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
524       x_l = a->j[j]*bs; x_r = x_l + 1.0;
525       aa = a->a + j*bs2;
526       for (k=0; k<bs; k++) {
527         for (l=0; l<bs; l++) {
528           if (PetscRealPart(*aa++) >=  0.) continue;
529           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
530         }
531       }
532     }
533   }
534   color = PETSC_DRAW_CYAN;
535   for (i=0,row=0; i<mbs; i++,row+=bs) {
536     for (j=a->i[i]; j<a->i[i+1]; j++) {
537       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for (k=0; k<bs; k++) {
541         for (l=0; l<bs; l++) {
542           if (PetscRealPart(*aa++) != 0.) continue;
543           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548 
549   color = PETSC_DRAW_RED;
550   for (i=0,row=0; i<mbs; i++,row+=bs) {
551     for (j=a->i[i]; j<a->i[i+1]; j++) {
552       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
553       x_l = a->j[j]*bs; x_r = x_l + 1.0;
554       aa = a->a + j*bs2;
555       for (k=0; k<bs; k++) {
556         for (l=0; l<bs; l++) {
557           if (PetscRealPart(*aa++) <= 0.) continue;
558           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
569 {
570   int          ierr;
571   PetscReal    xl,yl,xr,yr,w,h;
572   PetscDraw    draw;
573   PetscTruth   isnull;
574 
575   PetscFunctionBegin;
576 
577   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
578   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
579 
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
581   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
582   xr += w;    yr += h;  xl = -w;     yl = -h;
583   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
584   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatView_SeqBAIJ"
591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
592 {
593   int        ierr;
594   PetscTruth issocket,isascii,isbinary,isdraw;
595 
596   PetscFunctionBegin;
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601   if (issocket) {
602     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
603   } else if (isascii){
604     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
605   } else if (isbinary) {
606     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
607   } else if (isdraw) {
608     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
609   } else {
610     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetValues_SeqBAIJ"
618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
619 {
620   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
621   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
622   int        *ai = a->i,*ailen = a->ilen;
623   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
624   MatScalar  *ap,*aa = a->a,zero = 0.0;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over rows */
628     row  = im[k]; brow = row/bs;
629     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
630     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
632     nrow = ailen[brow];
633     for (l=0; l<n; l++) { /* loop over columns */
634       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
635       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
636       col  = in[l] ;
637       bcol = col/bs;
638       cidx = col%bs;
639       ridx = row%bs;
640       high = nrow;
641       low  = 0; /* assume unsorted */
642       while (high-low > 5) {
643         t = (low+high)/2;
644         if (rp[t] > bcol) high = t;
645         else             low  = t;
646       }
647       for (i=low; i<high; i++) {
648         if (rp[i] > bcol) break;
649         if (rp[i] == bcol) {
650           *v++ = ap[bs2*i+bs*cidx+ridx];
651           goto finished;
652         }
653       }
654       *v++ = zero;
655       finished:;
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #if defined(PETSC_USE_MAT_SINGLE)
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
665 {
666   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
667   int         ierr,i,N = m*n*b->bs2;
668   MatScalar   *vsingle;
669 
670   PetscFunctionBegin;
671   if (N > b->setvalueslen) {
672     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
673     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
674     b->setvalueslen  = N;
675   }
676   vsingle = b->setvaluescopy;
677   for (i=0; i<N; i++) {
678     vsingle[i] = v[i];
679   }
680   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 #endif
684 
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
689 {
690   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
691   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
692   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
693   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
694   PetscTruth  roworiented=a->roworiented;
695   MatScalar   *value = v,*ap,*aa = a->a,*bap;
696 
697   PetscFunctionBegin;
698   if (roworiented) {
699     stepval = (n-1)*bs;
700   } else {
701     stepval = (m-1)*bs;
702   }
703   for (k=0; k<m; k++) { /* loop over added rows */
704     row  = im[k];
705     if (row < 0) continue;
706 #if defined(PETSC_USE_BOPT_g)
707     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
708 #endif
709     rp   = aj + ai[row];
710     ap   = aa + bs2*ai[row];
711     rmax = imax[row];
712     nrow = ailen[row];
713     low  = 0;
714     for (l=0; l<n; l++) { /* loop over added columns */
715       if (in[l] < 0) continue;
716 #if defined(PETSC_USE_BOPT_g)
717       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
718 #endif
719       col = in[l];
720       if (roworiented) {
721         value = v + k*(stepval+bs)*bs + l*bs;
722       } else {
723         value = v + l*(stepval+bs)*bs + k*bs;
724       }
725       if (!sorted) low = 0; high = nrow;
726       while (high-low > 7) {
727         t = (low+high)/2;
728         if (rp[t] > col) high = t;
729         else             low  = t;
730       }
731       for (i=low; i<high; i++) {
732         if (rp[i] > col) break;
733         if (rp[i] == col) {
734           bap  = ap +  bs2*i;
735           if (roworiented) {
736             if (is == ADD_VALUES) {
737               for (ii=0; ii<bs; ii++,value+=stepval) {
738                 for (jj=ii; jj<bs2; jj+=bs) {
739                   bap[jj] += *value++;
740                 }
741               }
742             } else {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] = *value++;
746                 }
747               }
748             }
749           } else {
750             if (is == ADD_VALUES) {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=0; jj<bs; jj++) {
753                   *bap++ += *value++;
754                 }
755               }
756             } else {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++  = *value++;
760                 }
761               }
762             }
763           }
764           goto noinsert2;
765         }
766       }
767       if (nonew == 1) goto noinsert2;
768       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
769       if (nrow >= rmax) {
770         /* there is no extra room in row, therefore enlarge */
771         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
772         MatScalar *new_a;
773 
774         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775 
776         /* malloc new storage space */
777         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
778 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
779         new_j   = (int*)(new_a + bs2*new_nz);
780         new_i   = new_j + new_nz;
781 
782         /* copy over old data into new slots */
783         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
784         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
785         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
786         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
787         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
788         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
790         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
791         /* free up old matrix storage */
792         ierr = PetscFree(a->a);CHKERRQ(ierr);
793         if (!a->singlemalloc) {
794           ierr = PetscFree(a->i);CHKERRQ(ierr);
795           ierr = PetscFree(a->j);CHKERRQ(ierr);
796         }
797         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
798         a->singlemalloc = PETSC_TRUE;
799 
800         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
801         rmax = imax[row] = imax[row] + CHUNKSIZE;
802         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
803         a->maxnz += bs2*CHUNKSIZE;
804         a->reallocs++;
805         a->nz++;
806       }
807       N = nrow++ - 1;
808       /* shift up all the later entries in this row */
809       for (ii=N; ii>=i; ii--) {
810         rp[ii+1] = rp[ii];
811         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
812       }
813       if (N >= i) {
814         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
815       }
816       rp[i] = col;
817       bap   = ap +  bs2*i;
818       if (roworiented) {
819         for (ii=0; ii<bs; ii++,value+=stepval) {
820           for (jj=ii; jj<bs2; jj+=bs) {
821             bap[jj] = *value++;
822           }
823         }
824       } else {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=0; jj<bs; jj++) {
827             *bap++  = *value++;
828           }
829         }
830       }
831       noinsert2:;
832       low = i;
833     }
834     ailen[row] = nrow;
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
843 {
844   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
845   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
846   int        m = A->m,*ip,N,*ailen = a->ilen;
847   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
848   MatScalar  *aa = a->a,*ap;
849 
850   PetscFunctionBegin;
851   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
852 
853   if (m) rmax = ailen[0];
854   for (i=1; i<mbs; i++) {
855     /* move each row back by the amount of empty slots (fshift) before it*/
856     fshift += imax[i-1] - ailen[i-1];
857     rmax   = PetscMax(rmax,ailen[i]);
858     if (fshift) {
859       ip = aj + ai[i]; ap = aa + bs2*ai[i];
860       N = ailen[i];
861       for (j=0; j<N; j++) {
862         ip[j-fshift] = ip[j];
863         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
864       }
865     }
866     ai[i] = ai[i-1] + ailen[i-1];
867   }
868   if (mbs) {
869     fshift += imax[mbs-1] - ailen[mbs-1];
870     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
871   }
872   /* reset ilen and imax for each row */
873   for (i=0; i<mbs; i++) {
874     ailen[i] = imax[i] = ai[i+1] - ai[i];
875   }
876   a->nz = ai[mbs];
877 
878   /* diagonals may have moved, so kill the diagonal pointers */
879   if (fshift && a->diag) {
880     ierr = PetscFree(a->diag);CHKERRQ(ierr);
881     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
882     a->diag = 0;
883   }
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
885            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
886   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
887            a->reallocs);
888   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
889   a->reallocs          = 0;
890   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
891 
892   PetscFunctionReturn(0);
893 }
894 
895 
896 
897 /*
898    This function returns an array of flags which indicate the locations of contiguous
899    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
900    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
901    Assume: sizes should be long enough to hold all the values.
902 */
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
906 {
907   int        i,j,k,row;
908   PetscTruth flg;
909 
910   PetscFunctionBegin;
911   for (i=0,j=0; i<n; j++) {
912     row = idx[i];
913     if (row%bs!=0) { /* Not the begining of a block */
914       sizes[j] = 1;
915       i++;
916     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
917       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
918       i++;
919     } else { /* Begining of the block, so check if the complete block exists */
920       flg = PETSC_TRUE;
921       for (k=1; k<bs; k++) {
922         if (row+k != idx[i+k]) { /* break in the block */
923           flg = PETSC_FALSE;
924           break;
925         }
926       }
927       if (flg == PETSC_TRUE) { /* No break in the bs */
928         sizes[j] = bs;
929         i+= bs;
930       } else {
931         sizes[j] = 1;
932         i++;
933       }
934     }
935   }
936   *bs_max = j;
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
943 {
944   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
945   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
946   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
947   Scalar      zero = 0.0;
948   MatScalar   *aa;
949 
950   PetscFunctionBegin;
951   /* Make a copy of the IS and  sort it */
952   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
953   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
954 
955   /* allocate memory for rows,sizes */
956   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
957   sizes = rows + is_n;
958 
959   /* copy IS values to rows, and sort them */
960   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
961   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
962   if (baij->keepzeroedrows) {
963     for (i=0; i<is_n; i++) { sizes[i] = 1; }
964     bs_max = is_n;
965   } else {
966     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
967   }
968   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
969 
970   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
971     row   = rows[j];
972     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
973     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
974     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
975     if (sizes[i] == bs && !baij->keepzeroedrows) {
976       if (diag) {
977         if (baij->ilen[row/bs] > 0) {
978           baij->ilen[row/bs]       = 1;
979           baij->j[baij->i[row/bs]] = row/bs;
980           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
981         }
982         /* Now insert all the diagonal values for this bs */
983         for (k=0; k<bs; k++) {
984           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
985         }
986       } else { /* (!diag) */
987         baij->ilen[row/bs] = 0;
988       } /* end (!diag) */
989     } else { /* (sizes[i] != bs) */
990 #if defined (PETSC_USE_DEBUG)
991       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
992 #endif
993       for (k=0; k<count; k++) {
994         aa[0] =  zero;
995         aa    += bs;
996       }
997       if (diag) {
998         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
999       }
1000     }
1001   }
1002 
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1011 {
1012   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1013   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1014   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1015   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1016   int         ridx,cidx,bs2=a->bs2,ierr;
1017   PetscTruth  roworiented=a->roworiented;
1018   MatScalar   *ap,value,*aa=a->a,*bap;
1019 
1020   PetscFunctionBegin;
1021   for (k=0; k<m; k++) { /* loop over added rows */
1022     row  = im[k]; brow = row/bs;
1023     if (row < 0) continue;
1024 #if defined(PETSC_USE_BOPT_g)
1025     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1026 #endif
1027     rp   = aj + ai[brow];
1028     ap   = aa + bs2*ai[brow];
1029     rmax = imax[brow];
1030     nrow = ailen[brow];
1031     low  = 0;
1032     for (l=0; l<n; l++) { /* loop over added columns */
1033       if (in[l] < 0) continue;
1034 #if defined(PETSC_USE_BOPT_g)
1035       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1036 #endif
1037       col = in[l]; bcol = col/bs;
1038       ridx = row % bs; cidx = col % bs;
1039       if (roworiented) {
1040         value = v[l + k*n];
1041       } else {
1042         value = v[k + l*m];
1043       }
1044       if (!sorted) low = 0; high = nrow;
1045       while (high-low > 7) {
1046         t = (low+high)/2;
1047         if (rp[t] > bcol) high = t;
1048         else              low  = t;
1049       }
1050       for (i=low; i<high; i++) {
1051         if (rp[i] > bcol) break;
1052         if (rp[i] == bcol) {
1053           bap  = ap +  bs2*i + bs*cidx + ridx;
1054           if (is == ADD_VALUES) *bap += value;
1055           else                  *bap  = value;
1056           goto noinsert1;
1057         }
1058       }
1059       if (nonew == 1) goto noinsert1;
1060       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1061       if (nrow >= rmax) {
1062         /* there is no extra room in row, therefore enlarge */
1063         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1064         MatScalar *new_a;
1065 
1066         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1067 
1068         /* Malloc new storage space */
1069         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1070 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1071         new_j   = (int*)(new_a + bs2*new_nz);
1072         new_i   = new_j + new_nz;
1073 
1074         /* copy over old data into new slots */
1075         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1076         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1077         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1078         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1079         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1080         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1082         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1083         /* free up old matrix storage */
1084         ierr = PetscFree(a->a);CHKERRQ(ierr);
1085         if (!a->singlemalloc) {
1086           ierr = PetscFree(a->i);CHKERRQ(ierr);
1087           ierr = PetscFree(a->j);CHKERRQ(ierr);
1088         }
1089         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1090         a->singlemalloc = PETSC_TRUE;
1091 
1092         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1093         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1094         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1095         a->maxnz += bs2*CHUNKSIZE;
1096         a->reallocs++;
1097         a->nz++;
1098       }
1099       N = nrow++ - 1;
1100       /* shift up all the later entries in this row */
1101       for (ii=N; ii>=i; ii--) {
1102         rp[ii+1] = rp[ii];
1103         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1104       }
1105       if (N>=i) {
1106         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1107       }
1108       rp[i]                      = bcol;
1109       ap[bs2*i + bs*cidx + ridx] = value;
1110       noinsert1:;
1111       low = i;
1112     }
1113     ailen[brow] = nrow;
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*);
1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*);
1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat);
1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat);
1132 
1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1148 
1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1156 
1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1165 
1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1178 {
1179   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1180   Mat         outA;
1181   int         ierr;
1182   PetscTruth  row_identity,col_identity;
1183 
1184   PetscFunctionBegin;
1185   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1186   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1187   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1188   if (!row_identity || !col_identity) {
1189     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1190   }
1191 
1192   outA          = inA;
1193   inA->factor   = FACTOR_LU;
1194 
1195   if (!a->diag) {
1196     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1197   }
1198   /*
1199       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1200       for ILU(0) factorization with natural ordering
1201   */
1202   switch (a->bs) {
1203   case 1:
1204     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1205     inA->ops->solve           = MatSolve_SeqBAIJ_1_NaturalOrdering;
1206     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
1207     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1208   case 2:
1209     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1210     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1211     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1212     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1213     break;
1214   case 3:
1215     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1216     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1217     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1218     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1219     break;
1220   case 4:
1221 #if defined(PETSC_HAVE_SSE)
1222     {
1223       PetscTruth sse_enabled,single_prec,flg;
1224       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
1225       if (sse_enabled) {
1226         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
1227         ierr = PetscOptionsGetLogical(PETSC_NULL,"-matsolve_single_precision",&single_prec,&flg);CHKERRQ(ierr);
1228         if (flg) {
1229           if (single_prec) {
1230             a->singleprecisionsolves = PETSC_TRUE;
1231           } else {
1232             a->singleprecisionsolves = PETSC_FALSE;
1233           }
1234         }
1235         if (a->singleprecisionsolves) {
1236           inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion;
1237         } else {
1238           inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1239         }
1240       } else {
1241         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1242         inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1243       }
1244     }
1245 #else
1246     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1247     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1248 #endif
1249     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1250     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1251     break;
1252   case 5:
1253     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1254     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1255     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1256     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1257     break;
1258   case 6:
1259     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1260     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1261     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1262     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1263     break;
1264   case 7:
1265     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1266     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1267     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1268     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1269     break;
1270   default:
1271     a->row        = row;
1272     a->col        = col;
1273     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1274     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1275 
1276     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1277     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1278     PetscLogObjectParent(inA,a->icol);
1279 
1280     if (!a->solve_work) {
1281       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1282       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1283     }
1284   }
1285 
1286   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1287 
1288   PetscFunctionReturn(0);
1289 }
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1292 int MatPrintHelp_SeqBAIJ(Mat A)
1293 {
1294   static PetscTruth called = PETSC_FALSE;
1295   MPI_Comm          comm = A->comm;
1296   int               ierr;
1297 
1298   PetscFunctionBegin;
1299   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1300   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1301   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 EXTERN_C_BEGIN
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1308 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1309 {
1310   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1311   int         i,nz,n;
1312 
1313   PetscFunctionBegin;
1314   nz = baij->maxnz;
1315   n  = mat->n;
1316   for (i=0; i<nz; i++) {
1317     baij->j[i] = indices[i];
1318   }
1319   baij->nz = nz;
1320   for (i=0; i<n; i++) {
1321     baij->ilen[i] = baij->imax[i];
1322   }
1323 
1324   PetscFunctionReturn(0);
1325 }
1326 EXTERN_C_END
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1330 /*@
1331     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1332        in the matrix.
1333 
1334   Input Parameters:
1335 +  mat - the SeqBAIJ matrix
1336 -  indices - the column indices
1337 
1338   Level: advanced
1339 
1340   Notes:
1341     This can be called if you have precomputed the nonzero structure of the
1342   matrix and want to provide it to the matrix object to improve the performance
1343   of the MatSetValues() operation.
1344 
1345     You MUST have set the correct numbers of nonzeros per row in the call to
1346   MatCreateSeqBAIJ().
1347 
1348     MUST be called before any calls to MatSetValues();
1349 
1350 @*/
1351 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1352 {
1353   int ierr,(*f)(Mat,int *);
1354 
1355   PetscFunctionBegin;
1356   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1357   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1358   if (f) {
1359     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1360   } else {
1361     SETERRQ(1,"Wrong type of matrix to set column indices");
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1368 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1369 {
1370   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1371   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1372   PetscReal    atmp;
1373   Scalar       *x,zero = 0.0;
1374   MatScalar    *aa;
1375   int          ncols,brow,krow,kcol;
1376 
1377   PetscFunctionBegin;
1378   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1379   bs   = a->bs;
1380   aa   = a->a;
1381   ai   = a->i;
1382   aj   = a->j;
1383   mbs = a->mbs;
1384 
1385   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1386   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1387   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1388   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1389   for (i=0; i<mbs; i++) {
1390     ncols = ai[1] - ai[0]; ai++;
1391     brow  = bs*i;
1392     for (j=0; j<ncols; j++){
1393       /* bcol = bs*(*aj); */
1394       for (kcol=0; kcol<bs; kcol++){
1395         for (krow=0; krow<bs; krow++){
1396           atmp = PetscAbsScalar(*aa); aa++;
1397           row = brow + krow;    /* row index */
1398           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1399           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1400         }
1401       }
1402       aj++;
1403     }
1404   }
1405   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1411 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1412 {
1413   int        ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1422 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1423 {
1424   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1425   PetscFunctionBegin;
1426   *array = a->a;
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1432 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1433 {
1434   PetscFunctionBegin;
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 /* -------------------------------------------------------------------*/
1439 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1440        MatGetRow_SeqBAIJ,
1441        MatRestoreRow_SeqBAIJ,
1442        MatMult_SeqBAIJ_N,
1443        MatMultAdd_SeqBAIJ_N,
1444        MatMultTranspose_SeqBAIJ,
1445        MatMultTransposeAdd_SeqBAIJ,
1446        MatSolve_SeqBAIJ_N,
1447        0,
1448        0,
1449        0,
1450        MatLUFactor_SeqBAIJ,
1451        0,
1452        0,
1453        MatTranspose_SeqBAIJ,
1454        MatGetInfo_SeqBAIJ,
1455        MatEqual_SeqBAIJ,
1456        MatGetDiagonal_SeqBAIJ,
1457        MatDiagonalScale_SeqBAIJ,
1458        MatNorm_SeqBAIJ,
1459        0,
1460        MatAssemblyEnd_SeqBAIJ,
1461        0,
1462        MatSetOption_SeqBAIJ,
1463        MatZeroEntries_SeqBAIJ,
1464        MatZeroRows_SeqBAIJ,
1465        MatLUFactorSymbolic_SeqBAIJ,
1466        MatLUFactorNumeric_SeqBAIJ_N,
1467        0,
1468        0,
1469        MatSetUpPreallocation_SeqBAIJ,
1470        0,
1471        MatGetOwnershipRange_SeqBAIJ,
1472        MatILUFactorSymbolic_SeqBAIJ,
1473        0,
1474        MatGetArray_SeqBAIJ,
1475        MatRestoreArray_SeqBAIJ,
1476        MatDuplicate_SeqBAIJ,
1477        0,
1478        0,
1479        MatILUFactor_SeqBAIJ,
1480        0,
1481        0,
1482        MatGetSubMatrices_SeqBAIJ,
1483        MatIncreaseOverlap_SeqBAIJ,
1484        MatGetValues_SeqBAIJ,
1485        0,
1486        MatPrintHelp_SeqBAIJ,
1487        MatScale_SeqBAIJ,
1488        0,
1489        0,
1490        0,
1491        MatGetBlockSize_SeqBAIJ,
1492        MatGetRowIJ_SeqBAIJ,
1493        MatRestoreRowIJ_SeqBAIJ,
1494        0,
1495        0,
1496        0,
1497        0,
1498        0,
1499        0,
1500        MatSetValuesBlocked_SeqBAIJ,
1501        MatGetSubMatrix_SeqBAIJ,
1502        MatDestroy_SeqBAIJ,
1503        MatView_SeqBAIJ,
1504        MatGetMaps_Petsc,
1505        0,
1506        0,
1507        0,
1508        0,
1509        0,
1510        0,
1511        MatGetRowMax_SeqBAIJ,
1512        MatConvert_Basic};
1513 
1514 EXTERN_C_BEGIN
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1517 int MatStoreValues_SeqBAIJ(Mat mat)
1518 {
1519   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1520   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1521   int         ierr;
1522 
1523   PetscFunctionBegin;
1524   if (aij->nonew != 1) {
1525     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1526   }
1527 
1528   /* allocate space for values if not already there */
1529   if (!aij->saved_values) {
1530     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1531   }
1532 
1533   /* copy values over */
1534   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1535   PetscFunctionReturn(0);
1536 }
1537 EXTERN_C_END
1538 
1539 EXTERN_C_BEGIN
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1542 int MatRetrieveValues_SeqBAIJ(Mat mat)
1543 {
1544   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1545   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1546 
1547   PetscFunctionBegin;
1548   if (aij->nonew != 1) {
1549     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1550   }
1551   if (!aij->saved_values) {
1552     SETERRQ(1,"Must call MatStoreValues(A);first");
1553   }
1554 
1555   /* copy values over */
1556   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1557   PetscFunctionReturn(0);
1558 }
1559 EXTERN_C_END
1560 
1561 EXTERN_C_BEGIN
1562 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1563 EXTERN_C_END
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatCreate_SeqBAIJ"
1568 int MatCreate_SeqBAIJ(Mat B)
1569 {
1570   int         ierr,size;
1571   Mat_SeqBAIJ *b;
1572 
1573   PetscFunctionBegin;
1574   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1575   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1576 
1577   B->m = B->M = PetscMax(B->m,B->M);
1578   B->n = B->N = PetscMax(B->n,B->N);
1579   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1580   B->data = (void*)b;
1581   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1582   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1583   B->factor           = 0;
1584   B->lupivotthreshold = 1.0;
1585   B->mapping          = 0;
1586   b->row              = 0;
1587   b->col              = 0;
1588   b->icol             = 0;
1589   b->reallocs         = 0;
1590   b->saved_values     = 0;
1591 #if defined(PETSC_USE_MAT_SINGLE)
1592   b->setvalueslen     = 0;
1593   b->setvaluescopy    = PETSC_NULL;
1594 #endif
1595 
1596   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1597   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1598 
1599   b->sorted           = PETSC_FALSE;
1600   b->roworiented      = PETSC_TRUE;
1601   b->nonew            = 0;
1602   b->diag             = 0;
1603   b->solve_work       = 0;
1604   b->mult_work        = 0;
1605   b->spptr            = 0;
1606   B->info.nz_unneeded = (PetscReal)b->maxnz;
1607   b->keepzeroedrows   = PETSC_FALSE;
1608 
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1610                                      "MatStoreValues_SeqBAIJ",
1611                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1613                                      "MatRetrieveValues_SeqBAIJ",
1614                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1616                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1617                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1619                                      "MatConvert_SeqBAIJ_SeqAIJ",
1620                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 EXTERN_C_END
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1627 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1628 {
1629   Mat         C;
1630   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1631   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1632 
1633   PetscFunctionBegin;
1634   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1635 
1636   *B = 0;
1637   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1638   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1639   c    = (Mat_SeqBAIJ*)C->data;
1640 
1641   c->bs         = a->bs;
1642   c->bs2        = a->bs2;
1643   c->mbs        = a->mbs;
1644   c->nbs        = a->nbs;
1645   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1646 
1647   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1648   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1649   for (i=0; i<mbs; i++) {
1650     c->imax[i] = a->imax[i];
1651     c->ilen[i] = a->ilen[i];
1652   }
1653 
1654   /* allocate the matrix space */
1655   c->singlemalloc = PETSC_TRUE;
1656   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1657   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1658   c->j = (int*)(c->a + nz*bs2);
1659   c->i = c->j + nz;
1660   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1661   if (mbs > 0) {
1662     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1663     if (cpvalues == MAT_COPY_VALUES) {
1664       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1665     } else {
1666       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1667     }
1668   }
1669 
1670   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1671   c->sorted      = a->sorted;
1672   c->roworiented = a->roworiented;
1673   c->nonew       = a->nonew;
1674 
1675   if (a->diag) {
1676     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1677     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1678     for (i=0; i<mbs; i++) {
1679       c->diag[i] = a->diag[i];
1680     }
1681   } else c->diag        = 0;
1682   c->nz                 = a->nz;
1683   c->maxnz              = a->maxnz;
1684   c->solve_work         = 0;
1685   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1686   c->mult_work          = 0;
1687   C->preallocated       = PETSC_TRUE;
1688   C->assembled          = PETSC_TRUE;
1689   *B = C;
1690   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 EXTERN_C_BEGIN
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatLoad_SeqBAIJ"
1697 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1698 {
1699   Mat_SeqBAIJ  *a;
1700   Mat          B;
1701   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1702   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1703   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1704   int          *masked,nmask,tmp,bs2,ishift;
1705   Scalar       *aa;
1706   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1710   bs2  = bs*bs;
1711 
1712   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1713   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1714   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1715   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1716   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1717   M = header[1]; N = header[2]; nz = header[3];
1718 
1719   if (header[3] < 0) {
1720     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1721   }
1722 
1723   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1724 
1725   /*
1726      This code adds extra rows to make sure the number of rows is
1727     divisible by the blocksize
1728   */
1729   mbs        = M/bs;
1730   extra_rows = bs - M + bs*(mbs);
1731   if (extra_rows == bs) extra_rows = 0;
1732   else                  mbs++;
1733   if (extra_rows) {
1734     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1735   }
1736 
1737   /* read in row lengths */
1738   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1739   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1740   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1741 
1742   /* read in column indices */
1743   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1744   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1745   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1746 
1747   /* loop over row lengths determining block row lengths */
1748   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1749   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1750   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1751   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1752   masked   = mask + mbs;
1753   rowcount = 0; nzcount = 0;
1754   for (i=0; i<mbs; i++) {
1755     nmask = 0;
1756     for (j=0; j<bs; j++) {
1757       kmax = rowlengths[rowcount];
1758       for (k=0; k<kmax; k++) {
1759         tmp = jj[nzcount++]/bs;
1760         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1761       }
1762       rowcount++;
1763     }
1764     browlengths[i] += nmask;
1765     /* zero out the mask elements we set */
1766     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1767   }
1768 
1769   /* create our matrix */
1770   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1771   B = *A;
1772   a = (Mat_SeqBAIJ*)B->data;
1773 
1774   /* set matrix "i" values */
1775   a->i[0] = 0;
1776   for (i=1; i<= mbs; i++) {
1777     a->i[i]      = a->i[i-1] + browlengths[i-1];
1778     a->ilen[i-1] = browlengths[i-1];
1779   }
1780   a->nz         = 0;
1781   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1782 
1783   /* read in nonzero values */
1784   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1785   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1786   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1787 
1788   /* set "a" and "j" values into matrix */
1789   nzcount = 0; jcount = 0;
1790   for (i=0; i<mbs; i++) {
1791     nzcountb = nzcount;
1792     nmask    = 0;
1793     for (j=0; j<bs; j++) {
1794       kmax = rowlengths[i*bs+j];
1795       for (k=0; k<kmax; k++) {
1796         tmp = jj[nzcount++]/bs;
1797 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1798       }
1799     }
1800     /* sort the masked values */
1801     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1802 
1803     /* set "j" values into matrix */
1804     maskcount = 1;
1805     for (j=0; j<nmask; j++) {
1806       a->j[jcount++]  = masked[j];
1807       mask[masked[j]] = maskcount++;
1808     }
1809     /* set "a" values into matrix */
1810     ishift = bs2*a->i[i];
1811     for (j=0; j<bs; j++) {
1812       kmax = rowlengths[i*bs+j];
1813       for (k=0; k<kmax; k++) {
1814         tmp       = jj[nzcountb]/bs ;
1815         block     = mask[tmp] - 1;
1816         point     = jj[nzcountb] - bs*tmp;
1817         idx       = ishift + bs2*block + j + bs*point;
1818         a->a[idx] = (MatScalar)aa[nzcountb++];
1819       }
1820     }
1821     /* zero out the mask elements we set */
1822     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1823   }
1824   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1825 
1826   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1827   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1828   ierr = PetscFree(aa);CHKERRQ(ierr);
1829   ierr = PetscFree(jj);CHKERRQ(ierr);
1830   ierr = PetscFree(mask);CHKERRQ(ierr);
1831 
1832   B->assembled = PETSC_TRUE;
1833 
1834   ierr = MatView_Private(B);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 EXTERN_C_END
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatCreateSeqBAIJ"
1841 /*@C
1842    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1843    compressed row) format.  For good matrix assembly performance the
1844    user should preallocate the matrix storage by setting the parameter nz
1845    (or the array nnz).  By setting these parameters accurately, performance
1846    during matrix assembly can be increased by more than a factor of 50.
1847 
1848    Collective on MPI_Comm
1849 
1850    Input Parameters:
1851 +  comm - MPI communicator, set to PETSC_COMM_SELF
1852 .  bs - size of block
1853 .  m - number of rows
1854 .  n - number of columns
1855 .  nz - number of nonzero blocks  per block row (same for all rows)
1856 -  nnz - array containing the number of nonzero blocks in the various block rows
1857          (possibly different for each block row) or PETSC_NULL
1858 
1859    Output Parameter:
1860 .  A - the matrix
1861 
1862    Options Database Keys:
1863 .   -mat_no_unroll - uses code that does not unroll the loops in the
1864                      block calculations (much slower)
1865 .    -mat_block_size - size of the blocks to use
1866 
1867    Level: intermediate
1868 
1869    Notes:
1870    A nonzero block is any block that as 1 or more nonzeros in it
1871 
1872    The block AIJ format is fully compatible with standard Fortran 77
1873    storage.  That is, the stored row and column indices can begin at
1874    either one (as in Fortran) or zero.  See the users' manual for details.
1875 
1876    Specify the preallocated storage with either nz or nnz (not both).
1877    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1878    allocation.  For additional details, see the users manual chapter on
1879    matrices.
1880 
1881 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1882 @*/
1883 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1884 {
1885   int ierr;
1886 
1887   PetscFunctionBegin;
1888   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1889   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1890   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1896 /*@C
1897    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1898    per row in the matrix. For good matrix assembly performance the
1899    user should preallocate the matrix storage by setting the parameter nz
1900    (or the array nnz).  By setting these parameters accurately, performance
1901    during matrix assembly can be increased by more than a factor of 50.
1902 
1903    Collective on MPI_Comm
1904 
1905    Input Parameters:
1906 +  A - the matrix
1907 .  bs - size of block
1908 .  nz - number of block nonzeros per block row (same for all rows)
1909 -  nnz - array containing the number of block nonzeros in the various block rows
1910          (possibly different for each block row) or PETSC_NULL
1911 
1912    Options Database Keys:
1913 .   -mat_no_unroll - uses code that does not unroll the loops in the
1914                      block calculations (much slower)
1915 .    -mat_block_size - size of the blocks to use
1916 
1917    Level: intermediate
1918 
1919    Notes:
1920    The block AIJ format is fully compatible with standard Fortran 77
1921    storage.  That is, the stored row and column indices can begin at
1922    either one (as in Fortran) or zero.  See the users' manual for details.
1923 
1924    Specify the preallocated storage with either nz or nnz (not both).
1925    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1926    allocation.  For additional details, see the users manual chapter on
1927    matrices.
1928 
1929 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1930 @*/
1931 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1932 {
1933   Mat_SeqBAIJ *b;
1934   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1935   PetscTruth  flg;
1936 
1937   PetscFunctionBegin;
1938   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1939   if (!flg) PetscFunctionReturn(0);
1940 
1941   B->preallocated = PETSC_TRUE;
1942   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1943   if (nnz && newbs != bs) {
1944     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1945   }
1946   bs   = newbs;
1947 
1948   mbs  = B->m/bs;
1949   nbs  = B->n/bs;
1950   bs2  = bs*bs;
1951 
1952   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1953     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1954   }
1955 
1956   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1957   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1958   if (nnz) {
1959     for (i=0; i<mbs; i++) {
1960       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1961       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1962     }
1963   }
1964 
1965   b       = (Mat_SeqBAIJ*)B->data;
1966   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1967   if (!flg) {
1968     switch (bs) {
1969     case 1:
1970       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1971       B->ops->solve           = MatSolve_SeqBAIJ_1;
1972       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1973       B->ops->mult            = MatMult_SeqBAIJ_1;
1974       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1975       break;
1976     case 2:
1977       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1978       B->ops->solve           = MatSolve_SeqBAIJ_2;
1979       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1980       B->ops->mult            = MatMult_SeqBAIJ_2;
1981       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1982       break;
1983     case 3:
1984       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1985       B->ops->solve           = MatSolve_SeqBAIJ_3;
1986       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1987       B->ops->mult            = MatMult_SeqBAIJ_3;
1988       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1989       break;
1990     case 4:
1991       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1992       B->ops->solve           = MatSolve_SeqBAIJ_4;
1993       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1994       B->ops->mult            = MatMult_SeqBAIJ_4;
1995       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1996       break;
1997     case 5:
1998       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1999       B->ops->solve           = MatSolve_SeqBAIJ_5;
2000       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
2001       B->ops->mult            = MatMult_SeqBAIJ_5;
2002       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2003       break;
2004     case 6:
2005       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2006       B->ops->solve           = MatSolve_SeqBAIJ_6;
2007       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
2008       B->ops->mult            = MatMult_SeqBAIJ_6;
2009       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2010       break;
2011     case 7:
2012       B->ops->mult            = MatMult_SeqBAIJ_7;
2013       B->ops->solve           = MatSolve_SeqBAIJ_7;
2014       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
2015       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2016       break;
2017     default:
2018       B->ops->mult            = MatMult_SeqBAIJ_N;
2019       B->ops->solve           = MatSolve_SeqBAIJ_N;
2020       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2021       break;
2022     }
2023   }
2024   b->bs      = bs;
2025   b->mbs     = mbs;
2026   b->nbs     = nbs;
2027   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2028   if (!nnz) {
2029     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2030     else if (nz <= 0)        nz = 1;
2031     for (i=0; i<mbs; i++) b->imax[i] = nz;
2032     nz = nz*mbs;
2033   } else {
2034     nz = 0;
2035     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2036   }
2037 
2038   /* allocate the matrix space */
2039   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2040   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2041   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2042   b->j  = (int*)(b->a + nz*bs2);
2043   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2044   b->i  = b->j + nz;
2045   b->singlemalloc = PETSC_TRUE;
2046 
2047   b->i[0] = 0;
2048   for (i=1; i<mbs+1; i++) {
2049     b->i[i] = b->i[i-1] + b->imax[i-1];
2050   }
2051 
2052   /* b->ilen will count nonzeros in each block row so far. */
2053   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2054   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2055   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2056 
2057   b->bs               = bs;
2058   b->bs2              = bs2;
2059   b->mbs              = mbs;
2060   b->nz               = 0;
2061   b->maxnz            = nz*bs2;
2062   B->info.nz_unneeded = (PetscReal)b->maxnz;
2063   PetscFunctionReturn(0);
2064 }
2065 
2066