xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4f1ab113834a080a3aec87dd5ed4279d2c883c90)
1 /*$Id: baij.c,v 1.230 2001/06/22 19:05:58 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215 #if defined(PETSC_USE_MAT_SINGLE)
216     if (a->bs==4) {
217       PetscTruth sse_enabled;
218       int ierr;
219       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
220       if (sse_enabled) {
221         a->singleprecisionsolves = PETSC_TRUE;
222       } else {
223         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
224       }
225     } else {
226       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
227     }
228 #else
229     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
230 #endif
231     break;
232   default:
233     SETERRQ(PETSC_ERR_SUP,"unknown option");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
241 {
242   PetscFunctionBegin;
243   if (m) *m = 0;
244   if (n) *n = A->m;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatGetRow_SeqBAIJ"
250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
253   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
254   MatScalar    *aa,*aa_i;
255   Scalar       *v_i;
256 
257   PetscFunctionBegin;
258   bs  = a->bs;
259   ai  = a->i;
260   aj  = a->j;
261   aa  = a->a;
262   bs2 = a->bs2;
263 
264   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
265 
266   bn  = row/bs;   /* Block number */
267   bp  = row % bs; /* Block Position */
268   M   = ai[bn+1] - ai[bn];
269   *nz = bs*M;
270 
271   if (v) {
272     *v = 0;
273     if (*nz) {
274       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
275       for (i=0; i<M; i++) { /* for each block in the block row */
276         v_i  = *v + i*bs;
277         aa_i = aa + bs2*(ai[bn] + i);
278         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
279       }
280     }
281   }
282 
283   if (idx) {
284     *idx = 0;
285     if (*nz) {
286       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
287       for (i=0; i<M; i++) { /* for each block in the block row */
288         idx_i = *idx + i*bs;
289         itmp  = bs*aj[ai[bn] + i];
290         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
291       }
292     }
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
300 {
301   int ierr;
302 
303   PetscFunctionBegin;
304   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
305   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatTranspose_SeqBAIJ"
311 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
312 {
313   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
314   Mat         C;
315   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
316   int         *rows,*cols,bs2=a->bs2;
317   Scalar      *array;
318 
319   PetscFunctionBegin;
320   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
321   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
322   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
323 
324 #if defined(PETSC_USE_MAT_SINGLE)
325   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
326   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
327 #else
328   array = a->a;
329 #endif
330 
331   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
332   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
333   ierr = PetscFree(col);CHKERRQ(ierr);
334   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
335   cols = rows + bs;
336   for (i=0; i<mbs; i++) {
337     cols[0] = i*bs;
338     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
339     len = ai[i+1] - ai[i];
340     for (j=0; j<len; j++) {
341       rows[0] = (*aj++)*bs;
342       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
343       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
344       array += bs2;
345     }
346   }
347   ierr = PetscFree(rows);CHKERRQ(ierr);
348 #if defined(PETSC_USE_MAT_SINGLE)
349   ierr = PetscFree(array);
350 #endif
351 
352   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354 
355   if (B) {
356     *B = C;
357   } else {
358     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
366 {
367   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
369   Scalar      *aa;
370   FILE        *file;
371 
372   PetscFunctionBegin;
373   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
374   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
375   col_lens[0] = MAT_COOKIE;
376 
377   col_lens[1] = A->m;
378   col_lens[2] = A->n;
379   col_lens[3] = a->nz*bs2;
380 
381   /* store lengths of each row and write (including header) to file */
382   count = 0;
383   for (i=0; i<a->mbs; i++) {
384     for (j=0; j<bs; j++) {
385       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
386     }
387   }
388   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
389   ierr = PetscFree(col_lens);CHKERRQ(ierr);
390 
391   /* store column indices (zero start index) */
392   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
393   count = 0;
394   for (i=0; i<a->mbs; i++) {
395     for (j=0; j<bs; j++) {
396       for (k=a->i[i]; k<a->i[i+1]; k++) {
397         for (l=0; l<bs; l++) {
398           jj[count++] = bs*a->j[k] + l;
399         }
400       }
401     }
402   }
403   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
404   ierr = PetscFree(jj);CHKERRQ(ierr);
405 
406   /* store nonzero values */
407   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
408   count = 0;
409   for (i=0; i<a->mbs; i++) {
410     for (j=0; j<bs; j++) {
411       for (k=a->i[i]; k<a->i[i+1]; k++) {
412         for (l=0; l<bs; l++) {
413           aa[count++] = a->a[bs2*k + l*bs + j];
414         }
415       }
416     }
417   }
418   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
419   ierr = PetscFree(aa);CHKERRQ(ierr);
420 
421   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
422   if (file) {
423     fprintf(file,"-matload_block_size %d\n",a->bs);
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
442   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
443     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   PetscViewer  viewer;
511 
512   PetscFunctionBegin;
513 
514   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
515   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
516 
517   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
518 
519   /* loop over matrix elements drawing boxes */
520   color = PETSC_DRAW_BLUE;
521   for (i=0,row=0; i<mbs; i++,row+=bs) {
522     for (j=a->i[i]; j<a->i[i+1]; j++) {
523       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
524       x_l = a->j[j]*bs; x_r = x_l + 1.0;
525       aa = a->a + j*bs2;
526       for (k=0; k<bs; k++) {
527         for (l=0; l<bs; l++) {
528           if (PetscRealPart(*aa++) >=  0.) continue;
529           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
530         }
531       }
532     }
533   }
534   color = PETSC_DRAW_CYAN;
535   for (i=0,row=0; i<mbs; i++,row+=bs) {
536     for (j=a->i[i]; j<a->i[i+1]; j++) {
537       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for (k=0; k<bs; k++) {
541         for (l=0; l<bs; l++) {
542           if (PetscRealPart(*aa++) != 0.) continue;
543           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548 
549   color = PETSC_DRAW_RED;
550   for (i=0,row=0; i<mbs; i++,row+=bs) {
551     for (j=a->i[i]; j<a->i[i+1]; j++) {
552       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
553       x_l = a->j[j]*bs; x_r = x_l + 1.0;
554       aa = a->a + j*bs2;
555       for (k=0; k<bs; k++) {
556         for (l=0; l<bs; l++) {
557           if (PetscRealPart(*aa++) <= 0.) continue;
558           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
569 {
570   int          ierr;
571   PetscReal    xl,yl,xr,yr,w,h;
572   PetscDraw    draw;
573   PetscTruth   isnull;
574 
575   PetscFunctionBegin;
576 
577   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
578   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
579 
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
581   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
582   xr += w;    yr += h;  xl = -w;     yl = -h;
583   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
584   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatView_SeqBAIJ"
591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
592 {
593   int        ierr;
594   PetscTruth issocket,isascii,isbinary,isdraw;
595 
596   PetscFunctionBegin;
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601   if (issocket) {
602     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
603   } else if (isascii){
604     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
605   } else if (isbinary) {
606     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
607   } else if (isdraw) {
608     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
609   } else {
610     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetValues_SeqBAIJ"
618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
619 {
620   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
621   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
622   int        *ai = a->i,*ailen = a->ilen;
623   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
624   MatScalar  *ap,*aa = a->a,zero = 0.0;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over rows */
628     row  = im[k]; brow = row/bs;
629     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
630     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
632     nrow = ailen[brow];
633     for (l=0; l<n; l++) { /* loop over columns */
634       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
635       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
636       col  = in[l] ;
637       bcol = col/bs;
638       cidx = col%bs;
639       ridx = row%bs;
640       high = nrow;
641       low  = 0; /* assume unsorted */
642       while (high-low > 5) {
643         t = (low+high)/2;
644         if (rp[t] > bcol) high = t;
645         else             low  = t;
646       }
647       for (i=low; i<high; i++) {
648         if (rp[i] > bcol) break;
649         if (rp[i] == bcol) {
650           *v++ = ap[bs2*i+bs*cidx+ridx];
651           goto finished;
652         }
653       }
654       *v++ = zero;
655       finished:;
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #if defined(PETSC_USE_MAT_SINGLE)
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
665 {
666   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
667   int         ierr,i,N = m*n*b->bs2;
668   MatScalar   *vsingle;
669 
670   PetscFunctionBegin;
671   if (N > b->setvalueslen) {
672     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
673     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
674     b->setvalueslen  = N;
675   }
676   vsingle = b->setvaluescopy;
677   for (i=0; i<N; i++) {
678     vsingle[i] = v[i];
679   }
680   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 #endif
684 
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
689 {
690   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
691   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
692   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
693   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
694   PetscTruth  roworiented=a->roworiented;
695   MatScalar   *value = v,*ap,*aa = a->a,*bap;
696 
697   PetscFunctionBegin;
698   if (roworiented) {
699     stepval = (n-1)*bs;
700   } else {
701     stepval = (m-1)*bs;
702   }
703   for (k=0; k<m; k++) { /* loop over added rows */
704     row  = im[k];
705     if (row < 0) continue;
706 #if defined(PETSC_USE_BOPT_g)
707     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
708 #endif
709     rp   = aj + ai[row];
710     ap   = aa + bs2*ai[row];
711     rmax = imax[row];
712     nrow = ailen[row];
713     low  = 0;
714     for (l=0; l<n; l++) { /* loop over added columns */
715       if (in[l] < 0) continue;
716 #if defined(PETSC_USE_BOPT_g)
717       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
718 #endif
719       col = in[l];
720       if (roworiented) {
721         value = v + k*(stepval+bs)*bs + l*bs;
722       } else {
723         value = v + l*(stepval+bs)*bs + k*bs;
724       }
725       if (!sorted) low = 0; high = nrow;
726       while (high-low > 7) {
727         t = (low+high)/2;
728         if (rp[t] > col) high = t;
729         else             low  = t;
730       }
731       for (i=low; i<high; i++) {
732         if (rp[i] > col) break;
733         if (rp[i] == col) {
734           bap  = ap +  bs2*i;
735           if (roworiented) {
736             if (is == ADD_VALUES) {
737               for (ii=0; ii<bs; ii++,value+=stepval) {
738                 for (jj=ii; jj<bs2; jj+=bs) {
739                   bap[jj] += *value++;
740                 }
741               }
742             } else {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] = *value++;
746                 }
747               }
748             }
749           } else {
750             if (is == ADD_VALUES) {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=0; jj<bs; jj++) {
753                   *bap++ += *value++;
754                 }
755               }
756             } else {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++  = *value++;
760                 }
761               }
762             }
763           }
764           goto noinsert2;
765         }
766       }
767       if (nonew == 1) goto noinsert2;
768       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
769       if (nrow >= rmax) {
770         /* there is no extra room in row, therefore enlarge */
771         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
772         MatScalar *new_a;
773 
774         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775 
776         /* malloc new storage space */
777         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
778 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
779         new_j   = (int*)(new_a + bs2*new_nz);
780         new_i   = new_j + new_nz;
781 
782         /* copy over old data into new slots */
783         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
784         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
785         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
786         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
787         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
788         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
790         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
791         /* free up old matrix storage */
792         ierr = PetscFree(a->a);CHKERRQ(ierr);
793         if (!a->singlemalloc) {
794           ierr = PetscFree(a->i);CHKERRQ(ierr);
795           ierr = PetscFree(a->j);CHKERRQ(ierr);
796         }
797         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
798         a->singlemalloc = PETSC_TRUE;
799 
800         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
801         rmax = imax[row] = imax[row] + CHUNKSIZE;
802         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
803         a->maxnz += bs2*CHUNKSIZE;
804         a->reallocs++;
805         a->nz++;
806       }
807       N = nrow++ - 1;
808       /* shift up all the later entries in this row */
809       for (ii=N; ii>=i; ii--) {
810         rp[ii+1] = rp[ii];
811         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
812       }
813       if (N >= i) {
814         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
815       }
816       rp[i] = col;
817       bap   = ap +  bs2*i;
818       if (roworiented) {
819         for (ii=0; ii<bs; ii++,value+=stepval) {
820           for (jj=ii; jj<bs2; jj+=bs) {
821             bap[jj] = *value++;
822           }
823         }
824       } else {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=0; jj<bs; jj++) {
827             *bap++  = *value++;
828           }
829         }
830       }
831       noinsert2:;
832       low = i;
833     }
834     ailen[row] = nrow;
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
843 {
844   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
845   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
846   int        m = A->m,*ip,N,*ailen = a->ilen;
847   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
848   MatScalar  *aa = a->a,*ap;
849 
850   PetscFunctionBegin;
851   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
852 
853   if (m) rmax = ailen[0];
854   for (i=1; i<mbs; i++) {
855     /* move each row back by the amount of empty slots (fshift) before it*/
856     fshift += imax[i-1] - ailen[i-1];
857     rmax   = PetscMax(rmax,ailen[i]);
858     if (fshift) {
859       ip = aj + ai[i]; ap = aa + bs2*ai[i];
860       N = ailen[i];
861       for (j=0; j<N; j++) {
862         ip[j-fshift] = ip[j];
863         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
864       }
865     }
866     ai[i] = ai[i-1] + ailen[i-1];
867   }
868   if (mbs) {
869     fshift += imax[mbs-1] - ailen[mbs-1];
870     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
871   }
872   /* reset ilen and imax for each row */
873   for (i=0; i<mbs; i++) {
874     ailen[i] = imax[i] = ai[i+1] - ai[i];
875   }
876   a->nz = ai[mbs];
877 
878   /* diagonals may have moved, so kill the diagonal pointers */
879   if (fshift && a->diag) {
880     ierr = PetscFree(a->diag);CHKERRQ(ierr);
881     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
882     a->diag = 0;
883   }
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
885            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
886   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
887            a->reallocs);
888   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
889   a->reallocs          = 0;
890   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
891 
892   PetscFunctionReturn(0);
893 }
894 
895 
896 
897 /*
898    This function returns an array of flags which indicate the locations of contiguous
899    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
900    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
901    Assume: sizes should be long enough to hold all the values.
902 */
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
906 {
907   int        i,j,k,row;
908   PetscTruth flg;
909 
910   PetscFunctionBegin;
911   for (i=0,j=0; i<n; j++) {
912     row = idx[i];
913     if (row%bs!=0) { /* Not the begining of a block */
914       sizes[j] = 1;
915       i++;
916     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
917       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
918       i++;
919     } else { /* Begining of the block, so check if the complete block exists */
920       flg = PETSC_TRUE;
921       for (k=1; k<bs; k++) {
922         if (row+k != idx[i+k]) { /* break in the block */
923           flg = PETSC_FALSE;
924           break;
925         }
926       }
927       if (flg == PETSC_TRUE) { /* No break in the bs */
928         sizes[j] = bs;
929         i+= bs;
930       } else {
931         sizes[j] = 1;
932         i++;
933       }
934     }
935   }
936   *bs_max = j;
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
943 {
944   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
945   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
946   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
947   Scalar      zero = 0.0;
948   MatScalar   *aa;
949 
950   PetscFunctionBegin;
951   /* Make a copy of the IS and  sort it */
952   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
953   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
954 
955   /* allocate memory for rows,sizes */
956   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
957   sizes = rows + is_n;
958 
959   /* copy IS values to rows, and sort them */
960   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
961   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
962   if (baij->keepzeroedrows) {
963     for (i=0; i<is_n; i++) { sizes[i] = 1; }
964     bs_max = is_n;
965   } else {
966     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
967   }
968   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
969 
970   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
971     row   = rows[j];
972     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
973     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
974     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
975     if (sizes[i] == bs && !baij->keepzeroedrows) {
976       if (diag) {
977         if (baij->ilen[row/bs] > 0) {
978           baij->ilen[row/bs]       = 1;
979           baij->j[baij->i[row/bs]] = row/bs;
980           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
981         }
982         /* Now insert all the diagonal values for this bs */
983         for (k=0; k<bs; k++) {
984           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
985         }
986       } else { /* (!diag) */
987         baij->ilen[row/bs] = 0;
988       } /* end (!diag) */
989     } else { /* (sizes[i] != bs) */
990 #if defined (PETSC_USE_DEBUG)
991       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
992 #endif
993       for (k=0; k<count; k++) {
994         aa[0] =  zero;
995         aa    += bs;
996       }
997       if (diag) {
998         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
999       }
1000     }
1001   }
1002 
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1011 {
1012   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1013   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1014   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1015   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1016   int         ridx,cidx,bs2=a->bs2,ierr;
1017   PetscTruth  roworiented=a->roworiented;
1018   MatScalar   *ap,value,*aa=a->a,*bap;
1019 
1020   PetscFunctionBegin;
1021   for (k=0; k<m; k++) { /* loop over added rows */
1022     row  = im[k]; brow = row/bs;
1023     if (row < 0) continue;
1024 #if defined(PETSC_USE_BOPT_g)
1025     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1026 #endif
1027     rp   = aj + ai[brow];
1028     ap   = aa + bs2*ai[brow];
1029     rmax = imax[brow];
1030     nrow = ailen[brow];
1031     low  = 0;
1032     for (l=0; l<n; l++) { /* loop over added columns */
1033       if (in[l] < 0) continue;
1034 #if defined(PETSC_USE_BOPT_g)
1035       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1036 #endif
1037       col = in[l]; bcol = col/bs;
1038       ridx = row % bs; cidx = col % bs;
1039       if (roworiented) {
1040         value = v[l + k*n];
1041       } else {
1042         value = v[k + l*m];
1043       }
1044       if (!sorted) low = 0; high = nrow;
1045       while (high-low > 7) {
1046         t = (low+high)/2;
1047         if (rp[t] > bcol) high = t;
1048         else              low  = t;
1049       }
1050       for (i=low; i<high; i++) {
1051         if (rp[i] > bcol) break;
1052         if (rp[i] == bcol) {
1053           bap  = ap +  bs2*i + bs*cidx + ridx;
1054           if (is == ADD_VALUES) *bap += value;
1055           else                  *bap  = value;
1056           goto noinsert1;
1057         }
1058       }
1059       if (nonew == 1) goto noinsert1;
1060       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1061       if (nrow >= rmax) {
1062         /* there is no extra room in row, therefore enlarge */
1063         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1064         MatScalar *new_a;
1065 
1066         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1067 
1068         /* Malloc new storage space */
1069         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1070 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1071         new_j   = (int*)(new_a + bs2*new_nz);
1072         new_i   = new_j + new_nz;
1073 
1074         /* copy over old data into new slots */
1075         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1076         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1077         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1078         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1079         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1080         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1082         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1083         /* free up old matrix storage */
1084         ierr = PetscFree(a->a);CHKERRQ(ierr);
1085         if (!a->singlemalloc) {
1086           ierr = PetscFree(a->i);CHKERRQ(ierr);
1087           ierr = PetscFree(a->j);CHKERRQ(ierr);
1088         }
1089         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1090         a->singlemalloc = PETSC_TRUE;
1091 
1092         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1093         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1094         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1095         a->maxnz += bs2*CHUNKSIZE;
1096         a->reallocs++;
1097         a->nz++;
1098       }
1099       N = nrow++ - 1;
1100       /* shift up all the later entries in this row */
1101       for (ii=N; ii>=i; ii--) {
1102         rp[ii+1] = rp[ii];
1103         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1104       }
1105       if (N>=i) {
1106         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1107       }
1108       rp[i]                      = bcol;
1109       ap[bs2*i + bs*cidx + ridx] = value;
1110       noinsert1:;
1111       low = i;
1112     }
1113     ailen[brow] = nrow;
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*);
1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*);
1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat);
1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat);
1132 
1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1148 
1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1156 
1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1165 
1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1178 {
1179   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1180   Mat         outA;
1181   int         ierr;
1182   PetscTruth  row_identity,col_identity;
1183 
1184   PetscFunctionBegin;
1185   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1186   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1187   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1188   if (!row_identity || !col_identity) {
1189     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1190   }
1191 
1192   outA          = inA;
1193   inA->factor   = FACTOR_LU;
1194 
1195   if (!a->diag) {
1196     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1197   }
1198   /*
1199       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1200       for ILU(0) factorization with natural ordering
1201   */
1202   switch (a->bs) {
1203   case 1:
1204     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1205     inA->ops->solve           = MatSolve_SeqBAIJ_1_NaturalOrdering;
1206     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
1207     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1208   case 2:
1209     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1210     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1211     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1212     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1213     break;
1214   case 3:
1215     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1216     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1217     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1218     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1219     break;
1220   case 4:
1221 #if defined(PETSC_HAVE_SSE)
1222     {
1223       PetscTruth sse_enabled;
1224       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
1225       if (sse_enabled) {
1226         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
1227       } else {
1228         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1229       }
1230     }
1231 #else
1232     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1233 #endif
1234     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1235     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1236     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1237     break;
1238   case 5:
1239     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1240     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1241     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1242     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1243     break;
1244   case 6:
1245     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1246     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1247     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1248     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1249     break;
1250   case 7:
1251     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1252     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1253     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1254     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1255     break;
1256   default:
1257     a->row        = row;
1258     a->col        = col;
1259     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1260     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1261 
1262     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1263     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1264     PetscLogObjectParent(inA,a->icol);
1265 
1266     if (!a->solve_work) {
1267       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1268       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1269     }
1270   }
1271 
1272   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1273 
1274   PetscFunctionReturn(0);
1275 }
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1278 int MatPrintHelp_SeqBAIJ(Mat A)
1279 {
1280   static PetscTruth called = PETSC_FALSE;
1281   MPI_Comm          comm = A->comm;
1282   int               ierr;
1283 
1284   PetscFunctionBegin;
1285   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1286   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1287   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 EXTERN_C_BEGIN
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1294 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1295 {
1296   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1297   int         i,nz,n;
1298 
1299   PetscFunctionBegin;
1300   nz = baij->maxnz;
1301   n  = mat->n;
1302   for (i=0; i<nz; i++) {
1303     baij->j[i] = indices[i];
1304   }
1305   baij->nz = nz;
1306   for (i=0; i<n; i++) {
1307     baij->ilen[i] = baij->imax[i];
1308   }
1309 
1310   PetscFunctionReturn(0);
1311 }
1312 EXTERN_C_END
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1316 /*@
1317     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1318        in the matrix.
1319 
1320   Input Parameters:
1321 +  mat - the SeqBAIJ matrix
1322 -  indices - the column indices
1323 
1324   Level: advanced
1325 
1326   Notes:
1327     This can be called if you have precomputed the nonzero structure of the
1328   matrix and want to provide it to the matrix object to improve the performance
1329   of the MatSetValues() operation.
1330 
1331     You MUST have set the correct numbers of nonzeros per row in the call to
1332   MatCreateSeqBAIJ().
1333 
1334     MUST be called before any calls to MatSetValues();
1335 
1336 @*/
1337 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1338 {
1339   int ierr,(*f)(Mat,int *);
1340 
1341   PetscFunctionBegin;
1342   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1343   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1344   if (f) {
1345     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1346   } else {
1347     SETERRQ(1,"Wrong type of matrix to set column indices");
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1354 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1355 {
1356   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1357   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1358   PetscReal    atmp;
1359   Scalar       *x,zero = 0.0;
1360   MatScalar    *aa;
1361   int          ncols,brow,krow,kcol;
1362 
1363   PetscFunctionBegin;
1364   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1365   bs   = a->bs;
1366   aa   = a->a;
1367   ai   = a->i;
1368   aj   = a->j;
1369   mbs = a->mbs;
1370 
1371   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1372   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1373   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1374   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1375   for (i=0; i<mbs; i++) {
1376     ncols = ai[1] - ai[0]; ai++;
1377     brow  = bs*i;
1378     for (j=0; j<ncols; j++){
1379       /* bcol = bs*(*aj); */
1380       for (kcol=0; kcol<bs; kcol++){
1381         for (krow=0; krow<bs; krow++){
1382           atmp = PetscAbsScalar(*aa); aa++;
1383           row = brow + krow;    /* row index */
1384           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1385           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1386         }
1387       }
1388       aj++;
1389     }
1390   }
1391   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1397 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1398 {
1399   int        ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1408 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1409 {
1410   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1411   PetscFunctionBegin;
1412   *array = a->a;
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1418 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1419 {
1420   PetscFunctionBegin;
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 /* -------------------------------------------------------------------*/
1425 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1426        MatGetRow_SeqBAIJ,
1427        MatRestoreRow_SeqBAIJ,
1428        MatMult_SeqBAIJ_N,
1429        MatMultAdd_SeqBAIJ_N,
1430        MatMultTranspose_SeqBAIJ,
1431        MatMultTransposeAdd_SeqBAIJ,
1432        MatSolve_SeqBAIJ_N,
1433        0,
1434        0,
1435        0,
1436        MatLUFactor_SeqBAIJ,
1437        0,
1438        0,
1439        MatTranspose_SeqBAIJ,
1440        MatGetInfo_SeqBAIJ,
1441        MatEqual_SeqBAIJ,
1442        MatGetDiagonal_SeqBAIJ,
1443        MatDiagonalScale_SeqBAIJ,
1444        MatNorm_SeqBAIJ,
1445        0,
1446        MatAssemblyEnd_SeqBAIJ,
1447        0,
1448        MatSetOption_SeqBAIJ,
1449        MatZeroEntries_SeqBAIJ,
1450        MatZeroRows_SeqBAIJ,
1451        MatLUFactorSymbolic_SeqBAIJ,
1452        MatLUFactorNumeric_SeqBAIJ_N,
1453        0,
1454        0,
1455        MatSetUpPreallocation_SeqBAIJ,
1456        0,
1457        MatGetOwnershipRange_SeqBAIJ,
1458        MatILUFactorSymbolic_SeqBAIJ,
1459        0,
1460        MatGetArray_SeqBAIJ,
1461        MatRestoreArray_SeqBAIJ,
1462        MatDuplicate_SeqBAIJ,
1463        0,
1464        0,
1465        MatILUFactor_SeqBAIJ,
1466        0,
1467        0,
1468        MatGetSubMatrices_SeqBAIJ,
1469        MatIncreaseOverlap_SeqBAIJ,
1470        MatGetValues_SeqBAIJ,
1471        0,
1472        MatPrintHelp_SeqBAIJ,
1473        MatScale_SeqBAIJ,
1474        0,
1475        0,
1476        0,
1477        MatGetBlockSize_SeqBAIJ,
1478        MatGetRowIJ_SeqBAIJ,
1479        MatRestoreRowIJ_SeqBAIJ,
1480        0,
1481        0,
1482        0,
1483        0,
1484        0,
1485        0,
1486        MatSetValuesBlocked_SeqBAIJ,
1487        MatGetSubMatrix_SeqBAIJ,
1488        MatDestroy_SeqBAIJ,
1489        MatView_SeqBAIJ,
1490        MatGetMaps_Petsc,
1491        0,
1492        0,
1493        0,
1494        0,
1495        0,
1496        0,
1497        MatGetRowMax_SeqBAIJ,
1498        MatConvert_Basic};
1499 
1500 EXTERN_C_BEGIN
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1503 int MatStoreValues_SeqBAIJ(Mat mat)
1504 {
1505   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1506   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1507   int         ierr;
1508 
1509   PetscFunctionBegin;
1510   if (aij->nonew != 1) {
1511     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1512   }
1513 
1514   /* allocate space for values if not already there */
1515   if (!aij->saved_values) {
1516     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1517   }
1518 
1519   /* copy values over */
1520   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1521   PetscFunctionReturn(0);
1522 }
1523 EXTERN_C_END
1524 
1525 EXTERN_C_BEGIN
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1528 int MatRetrieveValues_SeqBAIJ(Mat mat)
1529 {
1530   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1531   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1532 
1533   PetscFunctionBegin;
1534   if (aij->nonew != 1) {
1535     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1536   }
1537   if (!aij->saved_values) {
1538     SETERRQ(1,"Must call MatStoreValues(A);first");
1539   }
1540 
1541   /* copy values over */
1542   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 EXTERN_C_END
1546 
1547 EXTERN_C_BEGIN
1548 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1549 EXTERN_C_END
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatCreate_SeqBAIJ"
1554 int MatCreate_SeqBAIJ(Mat B)
1555 {
1556   int         ierr,size;
1557   Mat_SeqBAIJ *b;
1558 
1559   PetscFunctionBegin;
1560   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1561   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1562 
1563   B->m = B->M = PetscMax(B->m,B->M);
1564   B->n = B->N = PetscMax(B->n,B->N);
1565   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1566   B->data = (void*)b;
1567   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1568   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1569   B->factor           = 0;
1570   B->lupivotthreshold = 1.0;
1571   B->mapping          = 0;
1572   b->row              = 0;
1573   b->col              = 0;
1574   b->icol             = 0;
1575   b->reallocs         = 0;
1576   b->saved_values     = 0;
1577 #if defined(PETSC_USE_MAT_SINGLE)
1578   b->setvalueslen     = 0;
1579   b->setvaluescopy    = PETSC_NULL;
1580 #endif
1581 
1582   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1583   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1584 
1585   b->sorted           = PETSC_FALSE;
1586   b->roworiented      = PETSC_TRUE;
1587   b->nonew            = 0;
1588   b->diag             = 0;
1589   b->solve_work       = 0;
1590   b->mult_work        = 0;
1591   b->spptr            = 0;
1592   B->info.nz_unneeded = (PetscReal)b->maxnz;
1593   b->keepzeroedrows   = PETSC_FALSE;
1594 
1595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1596                                      "MatStoreValues_SeqBAIJ",
1597                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1599                                      "MatRetrieveValues_SeqBAIJ",
1600                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1601   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1602                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1603                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1605                                      "MatConvert_SeqBAIJ_SeqAIJ",
1606                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 EXTERN_C_END
1610 
1611 #undef __FUNCT__
1612 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1613 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1614 {
1615   Mat         C;
1616   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1617   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1618 
1619   PetscFunctionBegin;
1620   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1621 
1622   *B = 0;
1623   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1624   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1625   c    = (Mat_SeqBAIJ*)C->data;
1626 
1627   c->bs         = a->bs;
1628   c->bs2        = a->bs2;
1629   c->mbs        = a->mbs;
1630   c->nbs        = a->nbs;
1631   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1632 
1633   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1634   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1635   for (i=0; i<mbs; i++) {
1636     c->imax[i] = a->imax[i];
1637     c->ilen[i] = a->ilen[i];
1638   }
1639 
1640   /* allocate the matrix space */
1641   c->singlemalloc = PETSC_TRUE;
1642   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1643   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1644   c->j = (int*)(c->a + nz*bs2);
1645   c->i = c->j + nz;
1646   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1647   if (mbs > 0) {
1648     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1649     if (cpvalues == MAT_COPY_VALUES) {
1650       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1651     } else {
1652       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1653     }
1654   }
1655 
1656   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1657   c->sorted      = a->sorted;
1658   c->roworiented = a->roworiented;
1659   c->nonew       = a->nonew;
1660 
1661   if (a->diag) {
1662     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1663     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1664     for (i=0; i<mbs; i++) {
1665       c->diag[i] = a->diag[i];
1666     }
1667   } else c->diag        = 0;
1668   c->nz                 = a->nz;
1669   c->maxnz              = a->maxnz;
1670   c->solve_work         = 0;
1671   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1672   c->mult_work          = 0;
1673   C->preallocated       = PETSC_TRUE;
1674   C->assembled          = PETSC_TRUE;
1675   *B = C;
1676   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 EXTERN_C_BEGIN
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatLoad_SeqBAIJ"
1683 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1684 {
1685   Mat_SeqBAIJ  *a;
1686   Mat          B;
1687   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1688   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1689   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1690   int          *masked,nmask,tmp,bs2,ishift;
1691   Scalar       *aa;
1692   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1693 
1694   PetscFunctionBegin;
1695   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1696   bs2  = bs*bs;
1697 
1698   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1699   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1700   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1701   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1702   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1703   M = header[1]; N = header[2]; nz = header[3];
1704 
1705   if (header[3] < 0) {
1706     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1707   }
1708 
1709   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1710 
1711   /*
1712      This code adds extra rows to make sure the number of rows is
1713     divisible by the blocksize
1714   */
1715   mbs        = M/bs;
1716   extra_rows = bs - M + bs*(mbs);
1717   if (extra_rows == bs) extra_rows = 0;
1718   else                  mbs++;
1719   if (extra_rows) {
1720     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1721   }
1722 
1723   /* read in row lengths */
1724   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1725   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1726   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1727 
1728   /* read in column indices */
1729   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1730   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1731   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1732 
1733   /* loop over row lengths determining block row lengths */
1734   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1735   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1736   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1737   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1738   masked   = mask + mbs;
1739   rowcount = 0; nzcount = 0;
1740   for (i=0; i<mbs; i++) {
1741     nmask = 0;
1742     for (j=0; j<bs; j++) {
1743       kmax = rowlengths[rowcount];
1744       for (k=0; k<kmax; k++) {
1745         tmp = jj[nzcount++]/bs;
1746         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1747       }
1748       rowcount++;
1749     }
1750     browlengths[i] += nmask;
1751     /* zero out the mask elements we set */
1752     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1753   }
1754 
1755   /* create our matrix */
1756   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1757   B = *A;
1758   a = (Mat_SeqBAIJ*)B->data;
1759 
1760   /* set matrix "i" values */
1761   a->i[0] = 0;
1762   for (i=1; i<= mbs; i++) {
1763     a->i[i]      = a->i[i-1] + browlengths[i-1];
1764     a->ilen[i-1] = browlengths[i-1];
1765   }
1766   a->nz         = 0;
1767   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1768 
1769   /* read in nonzero values */
1770   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1771   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1772   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1773 
1774   /* set "a" and "j" values into matrix */
1775   nzcount = 0; jcount = 0;
1776   for (i=0; i<mbs; i++) {
1777     nzcountb = nzcount;
1778     nmask    = 0;
1779     for (j=0; j<bs; j++) {
1780       kmax = rowlengths[i*bs+j];
1781       for (k=0; k<kmax; k++) {
1782         tmp = jj[nzcount++]/bs;
1783 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1784       }
1785     }
1786     /* sort the masked values */
1787     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1788 
1789     /* set "j" values into matrix */
1790     maskcount = 1;
1791     for (j=0; j<nmask; j++) {
1792       a->j[jcount++]  = masked[j];
1793       mask[masked[j]] = maskcount++;
1794     }
1795     /* set "a" values into matrix */
1796     ishift = bs2*a->i[i];
1797     for (j=0; j<bs; j++) {
1798       kmax = rowlengths[i*bs+j];
1799       for (k=0; k<kmax; k++) {
1800         tmp       = jj[nzcountb]/bs ;
1801         block     = mask[tmp] - 1;
1802         point     = jj[nzcountb] - bs*tmp;
1803         idx       = ishift + bs2*block + j + bs*point;
1804         a->a[idx] = (MatScalar)aa[nzcountb++];
1805       }
1806     }
1807     /* zero out the mask elements we set */
1808     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1809   }
1810   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1811 
1812   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1813   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1814   ierr = PetscFree(aa);CHKERRQ(ierr);
1815   ierr = PetscFree(jj);CHKERRQ(ierr);
1816   ierr = PetscFree(mask);CHKERRQ(ierr);
1817 
1818   B->assembled = PETSC_TRUE;
1819 
1820   ierr = MatView_Private(B);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 EXTERN_C_END
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatCreateSeqBAIJ"
1827 /*@C
1828    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1829    compressed row) format.  For good matrix assembly performance the
1830    user should preallocate the matrix storage by setting the parameter nz
1831    (or the array nnz).  By setting these parameters accurately, performance
1832    during matrix assembly can be increased by more than a factor of 50.
1833 
1834    Collective on MPI_Comm
1835 
1836    Input Parameters:
1837 +  comm - MPI communicator, set to PETSC_COMM_SELF
1838 .  bs - size of block
1839 .  m - number of rows
1840 .  n - number of columns
1841 .  nz - number of nonzero blocks  per block row (same for all rows)
1842 -  nnz - array containing the number of nonzero blocks in the various block rows
1843          (possibly different for each block row) or PETSC_NULL
1844 
1845    Output Parameter:
1846 .  A - the matrix
1847 
1848    Options Database Keys:
1849 .   -mat_no_unroll - uses code that does not unroll the loops in the
1850                      block calculations (much slower)
1851 .    -mat_block_size - size of the blocks to use
1852 
1853    Level: intermediate
1854 
1855    Notes:
1856    A nonzero block is any block that as 1 or more nonzeros in it
1857 
1858    The block AIJ format is fully compatible with standard Fortran 77
1859    storage.  That is, the stored row and column indices can begin at
1860    either one (as in Fortran) or zero.  See the users' manual for details.
1861 
1862    Specify the preallocated storage with either nz or nnz (not both).
1863    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1864    allocation.  For additional details, see the users manual chapter on
1865    matrices.
1866 
1867 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1868 @*/
1869 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1870 {
1871   int ierr;
1872 
1873   PetscFunctionBegin;
1874   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1875   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1876   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1882 /*@C
1883    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1884    per row in the matrix. For good matrix assembly performance the
1885    user should preallocate the matrix storage by setting the parameter nz
1886    (or the array nnz).  By setting these parameters accurately, performance
1887    during matrix assembly can be increased by more than a factor of 50.
1888 
1889    Collective on MPI_Comm
1890 
1891    Input Parameters:
1892 +  A - the matrix
1893 .  bs - size of block
1894 .  nz - number of block nonzeros per block row (same for all rows)
1895 -  nnz - array containing the number of block nonzeros in the various block rows
1896          (possibly different for each block row) or PETSC_NULL
1897 
1898    Options Database Keys:
1899 .   -mat_no_unroll - uses code that does not unroll the loops in the
1900                      block calculations (much slower)
1901 .    -mat_block_size - size of the blocks to use
1902 
1903    Level: intermediate
1904 
1905    Notes:
1906    The block AIJ format is fully compatible with standard Fortran 77
1907    storage.  That is, the stored row and column indices can begin at
1908    either one (as in Fortran) or zero.  See the users' manual for details.
1909 
1910    Specify the preallocated storage with either nz or nnz (not both).
1911    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1912    allocation.  For additional details, see the users manual chapter on
1913    matrices.
1914 
1915 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1916 @*/
1917 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1918 {
1919   Mat_SeqBAIJ *b;
1920   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1921   PetscTruth  flg;
1922 
1923   PetscFunctionBegin;
1924   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1925   if (!flg) PetscFunctionReturn(0);
1926 
1927   B->preallocated = PETSC_TRUE;
1928   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1929   if (nnz && newbs != bs) {
1930     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1931   }
1932   bs   = newbs;
1933 
1934   mbs  = B->m/bs;
1935   nbs  = B->n/bs;
1936   bs2  = bs*bs;
1937 
1938   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1939     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1940   }
1941 
1942   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1943   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1944   if (nnz) {
1945     for (i=0; i<mbs; i++) {
1946       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1947       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1948     }
1949   }
1950 
1951   b       = (Mat_SeqBAIJ*)B->data;
1952   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1953   if (!flg) {
1954     switch (bs) {
1955     case 1:
1956       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1957       B->ops->solve           = MatSolve_SeqBAIJ_1;
1958       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1959       B->ops->mult            = MatMult_SeqBAIJ_1;
1960       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1961       break;
1962     case 2:
1963       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1964       B->ops->solve           = MatSolve_SeqBAIJ_2;
1965       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1966       B->ops->mult            = MatMult_SeqBAIJ_2;
1967       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1968       break;
1969     case 3:
1970       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1971       B->ops->solve           = MatSolve_SeqBAIJ_3;
1972       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1973       B->ops->mult            = MatMult_SeqBAIJ_3;
1974       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1975       break;
1976     case 4:
1977       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1978       B->ops->solve           = MatSolve_SeqBAIJ_4;
1979       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1980       B->ops->mult            = MatMult_SeqBAIJ_4;
1981       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1982       break;
1983     case 5:
1984       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1985       B->ops->solve           = MatSolve_SeqBAIJ_5;
1986       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1987       B->ops->mult            = MatMult_SeqBAIJ_5;
1988       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1989       break;
1990     case 6:
1991       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1992       B->ops->solve           = MatSolve_SeqBAIJ_6;
1993       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1994       B->ops->mult            = MatMult_SeqBAIJ_6;
1995       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1996       break;
1997     case 7:
1998       B->ops->mult            = MatMult_SeqBAIJ_7;
1999       B->ops->solve           = MatSolve_SeqBAIJ_7;
2000       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
2001       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2002       break;
2003     default:
2004       B->ops->mult            = MatMult_SeqBAIJ_N;
2005       B->ops->solve           = MatSolve_SeqBAIJ_N;
2006       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2007       break;
2008     }
2009   }
2010   b->bs      = bs;
2011   b->mbs     = mbs;
2012   b->nbs     = nbs;
2013   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2014   if (!nnz) {
2015     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2016     else if (nz <= 0)        nz = 1;
2017     for (i=0; i<mbs; i++) b->imax[i] = nz;
2018     nz = nz*mbs;
2019   } else {
2020     nz = 0;
2021     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2022   }
2023 
2024   /* allocate the matrix space */
2025   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2026   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2027   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2028   b->j  = (int*)(b->a + nz*bs2);
2029   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2030   b->i  = b->j + nz;
2031   b->singlemalloc = PETSC_TRUE;
2032 
2033   b->i[0] = 0;
2034   for (i=1; i<mbs+1; i++) {
2035     b->i[i] = b->i[i-1] + b->imax[i-1];
2036   }
2037 
2038   /* b->ilen will count nonzeros in each block row so far. */
2039   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2040   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2041   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2042 
2043   b->bs               = bs;
2044   b->bs2              = bs2;
2045   b->mbs              = mbs;
2046   b->nz               = 0;
2047   b->maxnz            = nz*bs2;
2048   B->info.nz_unneeded = (PetscReal)b->maxnz;
2049   PetscFunctionReturn(0);
2050 }
2051 
2052