xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7bd18339000a996b4d2a36f55f5bcb5e6ccfe1dd)
1 /*$Id: baij.c,v 1.229 2001/06/21 23:47:10 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215 #if defined(PETSC_USE_MAT_SINGLE)
216     if (a->bs==4) {
217       PetscTruth sse_enabled;
218       int ierr;
219       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
220       if (sse_enabled) {
221         singleprecisionsolves = PETSC_TRUE;
222       } else {
223         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
224       }
225     } else {
226       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
227     }
228 #else
229     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
230 #endif
231     break;
232   default:
233     SETERRQ(PETSC_ERR_SUP,"unknown option");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
241 {
242   PetscFunctionBegin;
243   if (m) *m = 0;
244   if (n) *n = A->m;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatGetRow_SeqBAIJ"
250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
253   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
254   MatScalar    *aa,*aa_i;
255   Scalar       *v_i;
256 
257   PetscFunctionBegin;
258   bs  = a->bs;
259   ai  = a->i;
260   aj  = a->j;
261   aa  = a->a;
262   bs2 = a->bs2;
263 
264   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
265 
266   bn  = row/bs;   /* Block number */
267   bp  = row % bs; /* Block Position */
268   M   = ai[bn+1] - ai[bn];
269   *nz = bs*M;
270 
271   if (v) {
272     *v = 0;
273     if (*nz) {
274       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
275       for (i=0; i<M; i++) { /* for each block in the block row */
276         v_i  = *v + i*bs;
277         aa_i = aa + bs2*(ai[bn] + i);
278         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
279       }
280     }
281   }
282 
283   if (idx) {
284     *idx = 0;
285     if (*nz) {
286       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
287       for (i=0; i<M; i++) { /* for each block in the block row */
288         idx_i = *idx + i*bs;
289         itmp  = bs*aj[ai[bn] + i];
290         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
291       }
292     }
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
300 {
301   int ierr;
302 
303   PetscFunctionBegin;
304   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
305   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatTranspose_SeqBAIJ"
311 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
312 {
313   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
314   Mat         C;
315   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
316   int         *rows,*cols,bs2=a->bs2;
317   Scalar      *array;
318 
319   PetscFunctionBegin;
320   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
321   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
322   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
323 
324 #if defined(PETSC_USE_MAT_SINGLE)
325   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
326   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
327 #else
328   array = a->a;
329 #endif
330 
331   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
332   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
333   ierr = PetscFree(col);CHKERRQ(ierr);
334   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
335   cols = rows + bs;
336   for (i=0; i<mbs; i++) {
337     cols[0] = i*bs;
338     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
339     len = ai[i+1] - ai[i];
340     for (j=0; j<len; j++) {
341       rows[0] = (*aj++)*bs;
342       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
343       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
344       array += bs2;
345     }
346   }
347   ierr = PetscFree(rows);CHKERRQ(ierr);
348 #if defined(PETSC_USE_MAT_SINGLE)
349   ierr = PetscFree(array);
350 #endif
351 
352   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354 
355   if (B) {
356     *B = C;
357   } else {
358     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
366 {
367   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
369   Scalar      *aa;
370   FILE        *file;
371 
372   PetscFunctionBegin;
373   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
374   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
375   col_lens[0] = MAT_COOKIE;
376 
377   col_lens[1] = A->m;
378   col_lens[2] = A->n;
379   col_lens[3] = a->nz*bs2;
380 
381   /* store lengths of each row and write (including header) to file */
382   count = 0;
383   for (i=0; i<a->mbs; i++) {
384     for (j=0; j<bs; j++) {
385       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
386     }
387   }
388   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
389   ierr = PetscFree(col_lens);CHKERRQ(ierr);
390 
391   /* store column indices (zero start index) */
392   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
393   count = 0;
394   for (i=0; i<a->mbs; i++) {
395     for (j=0; j<bs; j++) {
396       for (k=a->i[i]; k<a->i[i+1]; k++) {
397         for (l=0; l<bs; l++) {
398           jj[count++] = bs*a->j[k] + l;
399         }
400       }
401     }
402   }
403   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
404   ierr = PetscFree(jj);CHKERRQ(ierr);
405 
406   /* store nonzero values */
407   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
408   count = 0;
409   for (i=0; i<a->mbs; i++) {
410     for (j=0; j<bs; j++) {
411       for (k=a->i[i]; k<a->i[i+1]; k++) {
412         for (l=0; l<bs; l++) {
413           aa[count++] = a->a[bs2*k + l*bs + j];
414         }
415       }
416     }
417   }
418   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
419   ierr = PetscFree(aa);CHKERRQ(ierr);
420 
421   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
422   if (file) {
423     fprintf(file,"-matload_block_size %d\n",a->bs);
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
442   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
443     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   PetscViewer  viewer;
511 
512   PetscFunctionBegin;
513 
514   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
515   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
516 
517   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
518 
519   /* loop over matrix elements drawing boxes */
520   color = PETSC_DRAW_BLUE;
521   for (i=0,row=0; i<mbs; i++,row+=bs) {
522     for (j=a->i[i]; j<a->i[i+1]; j++) {
523       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
524       x_l = a->j[j]*bs; x_r = x_l + 1.0;
525       aa = a->a + j*bs2;
526       for (k=0; k<bs; k++) {
527         for (l=0; l<bs; l++) {
528           if (PetscRealPart(*aa++) >=  0.) continue;
529           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
530         }
531       }
532     }
533   }
534   color = PETSC_DRAW_CYAN;
535   for (i=0,row=0; i<mbs; i++,row+=bs) {
536     for (j=a->i[i]; j<a->i[i+1]; j++) {
537       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for (k=0; k<bs; k++) {
541         for (l=0; l<bs; l++) {
542           if (PetscRealPart(*aa++) != 0.) continue;
543           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548 
549   color = PETSC_DRAW_RED;
550   for (i=0,row=0; i<mbs; i++,row+=bs) {
551     for (j=a->i[i]; j<a->i[i+1]; j++) {
552       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
553       x_l = a->j[j]*bs; x_r = x_l + 1.0;
554       aa = a->a + j*bs2;
555       for (k=0; k<bs; k++) {
556         for (l=0; l<bs; l++) {
557           if (PetscRealPart(*aa++) <= 0.) continue;
558           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
569 {
570   int          ierr;
571   PetscReal    xl,yl,xr,yr,w,h;
572   PetscDraw    draw;
573   PetscTruth   isnull;
574 
575   PetscFunctionBegin;
576 
577   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
578   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
579 
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
581   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
582   xr += w;    yr += h;  xl = -w;     yl = -h;
583   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
584   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatView_SeqBAIJ"
591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
592 {
593   int        ierr;
594   PetscTruth issocket,isascii,isbinary,isdraw;
595 
596   PetscFunctionBegin;
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601   if (issocket) {
602     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
603   } else if (isascii){
604     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
605   } else if (isbinary) {
606     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
607   } else if (isdraw) {
608     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
609   } else {
610     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetValues_SeqBAIJ"
618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
619 {
620   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
621   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
622   int        *ai = a->i,*ailen = a->ilen;
623   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
624   MatScalar  *ap,*aa = a->a,zero = 0.0;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over rows */
628     row  = im[k]; brow = row/bs;
629     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
630     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
632     nrow = ailen[brow];
633     for (l=0; l<n; l++) { /* loop over columns */
634       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
635       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
636       col  = in[l] ;
637       bcol = col/bs;
638       cidx = col%bs;
639       ridx = row%bs;
640       high = nrow;
641       low  = 0; /* assume unsorted */
642       while (high-low > 5) {
643         t = (low+high)/2;
644         if (rp[t] > bcol) high = t;
645         else             low  = t;
646       }
647       for (i=low; i<high; i++) {
648         if (rp[i] > bcol) break;
649         if (rp[i] == bcol) {
650           *v++ = ap[bs2*i+bs*cidx+ridx];
651           goto finished;
652         }
653       }
654       *v++ = zero;
655       finished:;
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #if defined(PETSC_USE_MAT_SINGLE)
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
665 {
666   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
667   int         ierr,i,N = m*n*b->bs2;
668   MatScalar   *vsingle;
669 
670   PetscFunctionBegin;
671   if (N > b->setvalueslen) {
672     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
673     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
674     b->setvalueslen  = N;
675   }
676   vsingle = b->setvaluescopy;
677   for (i=0; i<N; i++) {
678     vsingle[i] = v[i];
679   }
680   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 #endif
684 
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
689 {
690   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
691   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
692   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
693   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
694   PetscTruth  roworiented=a->roworiented;
695   MatScalar   *value = v,*ap,*aa = a->a,*bap;
696 
697   PetscFunctionBegin;
698   if (roworiented) {
699     stepval = (n-1)*bs;
700   } else {
701     stepval = (m-1)*bs;
702   }
703   for (k=0; k<m; k++) { /* loop over added rows */
704     row  = im[k];
705     if (row < 0) continue;
706 #if defined(PETSC_USE_BOPT_g)
707     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
708 #endif
709     rp   = aj + ai[row];
710     ap   = aa + bs2*ai[row];
711     rmax = imax[row];
712     nrow = ailen[row];
713     low  = 0;
714     for (l=0; l<n; l++) { /* loop over added columns */
715       if (in[l] < 0) continue;
716 #if defined(PETSC_USE_BOPT_g)
717       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
718 #endif
719       col = in[l];
720       if (roworiented) {
721         value = v + k*(stepval+bs)*bs + l*bs;
722       } else {
723         value = v + l*(stepval+bs)*bs + k*bs;
724       }
725       if (!sorted) low = 0; high = nrow;
726       while (high-low > 7) {
727         t = (low+high)/2;
728         if (rp[t] > col) high = t;
729         else             low  = t;
730       }
731       for (i=low; i<high; i++) {
732         if (rp[i] > col) break;
733         if (rp[i] == col) {
734           bap  = ap +  bs2*i;
735           if (roworiented) {
736             if (is == ADD_VALUES) {
737               for (ii=0; ii<bs; ii++,value+=stepval) {
738                 for (jj=ii; jj<bs2; jj+=bs) {
739                   bap[jj] += *value++;
740                 }
741               }
742             } else {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] = *value++;
746                 }
747               }
748             }
749           } else {
750             if (is == ADD_VALUES) {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=0; jj<bs; jj++) {
753                   *bap++ += *value++;
754                 }
755               }
756             } else {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++  = *value++;
760                 }
761               }
762             }
763           }
764           goto noinsert2;
765         }
766       }
767       if (nonew == 1) goto noinsert2;
768       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
769       if (nrow >= rmax) {
770         /* there is no extra room in row, therefore enlarge */
771         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
772         MatScalar *new_a;
773 
774         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775 
776         /* malloc new storage space */
777         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
778 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
779         new_j   = (int*)(new_a + bs2*new_nz);
780         new_i   = new_j + new_nz;
781 
782         /* copy over old data into new slots */
783         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
784         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
785         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
786         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
787         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
788         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
790         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
791         /* free up old matrix storage */
792         ierr = PetscFree(a->a);CHKERRQ(ierr);
793         if (!a->singlemalloc) {
794           ierr = PetscFree(a->i);CHKERRQ(ierr);
795           ierr = PetscFree(a->j);CHKERRQ(ierr);
796         }
797         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
798         a->singlemalloc = PETSC_TRUE;
799 
800         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
801         rmax = imax[row] = imax[row] + CHUNKSIZE;
802         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
803         a->maxnz += bs2*CHUNKSIZE;
804         a->reallocs++;
805         a->nz++;
806       }
807       N = nrow++ - 1;
808       /* shift up all the later entries in this row */
809       for (ii=N; ii>=i; ii--) {
810         rp[ii+1] = rp[ii];
811         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
812       }
813       if (N >= i) {
814         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
815       }
816       rp[i] = col;
817       bap   = ap +  bs2*i;
818       if (roworiented) {
819         for (ii=0; ii<bs; ii++,value+=stepval) {
820           for (jj=ii; jj<bs2; jj+=bs) {
821             bap[jj] = *value++;
822           }
823         }
824       } else {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=0; jj<bs; jj++) {
827             *bap++  = *value++;
828           }
829         }
830       }
831       noinsert2:;
832       low = i;
833     }
834     ailen[row] = nrow;
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
843 {
844   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
845   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
846   int        m = A->m,*ip,N,*ailen = a->ilen;
847   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
848   MatScalar  *aa = a->a,*ap;
849 
850   PetscFunctionBegin;
851   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
852 
853   if (m) rmax = ailen[0];
854   for (i=1; i<mbs; i++) {
855     /* move each row back by the amount of empty slots (fshift) before it*/
856     fshift += imax[i-1] - ailen[i-1];
857     rmax   = PetscMax(rmax,ailen[i]);
858     if (fshift) {
859       ip = aj + ai[i]; ap = aa + bs2*ai[i];
860       N = ailen[i];
861       for (j=0; j<N; j++) {
862         ip[j-fshift] = ip[j];
863         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
864       }
865     }
866     ai[i] = ai[i-1] + ailen[i-1];
867   }
868   if (mbs) {
869     fshift += imax[mbs-1] - ailen[mbs-1];
870     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
871   }
872   /* reset ilen and imax for each row */
873   for (i=0; i<mbs; i++) {
874     ailen[i] = imax[i] = ai[i+1] - ai[i];
875   }
876   a->nz = ai[mbs];
877 
878   /* diagonals may have moved, so kill the diagonal pointers */
879   if (fshift && a->diag) {
880     ierr = PetscFree(a->diag);CHKERRQ(ierr);
881     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
882     a->diag = 0;
883   }
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
885            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
886   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
887            a->reallocs);
888   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
889   a->reallocs          = 0;
890   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
891 
892   PetscFunctionReturn(0);
893 }
894 
895 
896 
897 /*
898    This function returns an array of flags which indicate the locations of contiguous
899    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
900    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
901    Assume: sizes should be long enough to hold all the values.
902 */
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
906 {
907   int        i,j,k,row;
908   PetscTruth flg;
909 
910   PetscFunctionBegin;
911   for (i=0,j=0; i<n; j++) {
912     row = idx[i];
913     if (row%bs!=0) { /* Not the begining of a block */
914       sizes[j] = 1;
915       i++;
916     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
917       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
918       i++;
919     } else { /* Begining of the block, so check if the complete block exists */
920       flg = PETSC_TRUE;
921       for (k=1; k<bs; k++) {
922         if (row+k != idx[i+k]) { /* break in the block */
923           flg = PETSC_FALSE;
924           break;
925         }
926       }
927       if (flg == PETSC_TRUE) { /* No break in the bs */
928         sizes[j] = bs;
929         i+= bs;
930       } else {
931         sizes[j] = 1;
932         i++;
933       }
934     }
935   }
936   *bs_max = j;
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
943 {
944   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
945   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
946   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
947   Scalar      zero = 0.0;
948   MatScalar   *aa;
949 
950   PetscFunctionBegin;
951   /* Make a copy of the IS and  sort it */
952   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
953   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
954 
955   /* allocate memory for rows,sizes */
956   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
957   sizes = rows + is_n;
958 
959   /* copy IS values to rows, and sort them */
960   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
961   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
962   if (baij->keepzeroedrows) {
963     for (i=0; i<is_n; i++) { sizes[i] = 1; }
964     bs_max = is_n;
965   } else {
966     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
967   }
968   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
969 
970   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
971     row   = rows[j];
972     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
973     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
974     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
975     if (sizes[i] == bs && !baij->keepzeroedrows) {
976       if (diag) {
977         if (baij->ilen[row/bs] > 0) {
978           baij->ilen[row/bs]       = 1;
979           baij->j[baij->i[row/bs]] = row/bs;
980           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
981         }
982         /* Now insert all the diagonal values for this bs */
983         for (k=0; k<bs; k++) {
984           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
985         }
986       } else { /* (!diag) */
987         baij->ilen[row/bs] = 0;
988       } /* end (!diag) */
989     } else { /* (sizes[i] != bs) */
990 #if defined (PETSC_USE_DEBUG)
991       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
992 #endif
993       for (k=0; k<count; k++) {
994         aa[0] =  zero;
995         aa    += bs;
996       }
997       if (diag) {
998         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
999       }
1000     }
1001   }
1002 
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1011 {
1012   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1013   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1014   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1015   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1016   int         ridx,cidx,bs2=a->bs2,ierr;
1017   PetscTruth  roworiented=a->roworiented;
1018   MatScalar   *ap,value,*aa=a->a,*bap;
1019 
1020   PetscFunctionBegin;
1021   for (k=0; k<m; k++) { /* loop over added rows */
1022     row  = im[k]; brow = row/bs;
1023     if (row < 0) continue;
1024 #if defined(PETSC_USE_BOPT_g)
1025     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1026 #endif
1027     rp   = aj + ai[brow];
1028     ap   = aa + bs2*ai[brow];
1029     rmax = imax[brow];
1030     nrow = ailen[brow];
1031     low  = 0;
1032     for (l=0; l<n; l++) { /* loop over added columns */
1033       if (in[l] < 0) continue;
1034 #if defined(PETSC_USE_BOPT_g)
1035       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1036 #endif
1037       col = in[l]; bcol = col/bs;
1038       ridx = row % bs; cidx = col % bs;
1039       if (roworiented) {
1040         value = v[l + k*n];
1041       } else {
1042         value = v[k + l*m];
1043       }
1044       if (!sorted) low = 0; high = nrow;
1045       while (high-low > 7) {
1046         t = (low+high)/2;
1047         if (rp[t] > bcol) high = t;
1048         else              low  = t;
1049       }
1050       for (i=low; i<high; i++) {
1051         if (rp[i] > bcol) break;
1052         if (rp[i] == bcol) {
1053           bap  = ap +  bs2*i + bs*cidx + ridx;
1054           if (is == ADD_VALUES) *bap += value;
1055           else                  *bap  = value;
1056           goto noinsert1;
1057         }
1058       }
1059       if (nonew == 1) goto noinsert1;
1060       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1061       if (nrow >= rmax) {
1062         /* there is no extra room in row, therefore enlarge */
1063         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1064         MatScalar *new_a;
1065 
1066         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1067 
1068         /* Malloc new storage space */
1069         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1070 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1071         new_j   = (int*)(new_a + bs2*new_nz);
1072         new_i   = new_j + new_nz;
1073 
1074         /* copy over old data into new slots */
1075         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1076         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1077         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1078         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1079         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1080         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1082         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1083         /* free up old matrix storage */
1084         ierr = PetscFree(a->a);CHKERRQ(ierr);
1085         if (!a->singlemalloc) {
1086           ierr = PetscFree(a->i);CHKERRQ(ierr);
1087           ierr = PetscFree(a->j);CHKERRQ(ierr);
1088         }
1089         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1090         a->singlemalloc = PETSC_TRUE;
1091 
1092         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1093         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1094         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1095         a->maxnz += bs2*CHUNKSIZE;
1096         a->reallocs++;
1097         a->nz++;
1098       }
1099       N = nrow++ - 1;
1100       /* shift up all the later entries in this row */
1101       for (ii=N; ii>=i; ii--) {
1102         rp[ii+1] = rp[ii];
1103         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1104       }
1105       if (N>=i) {
1106         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1107       }
1108       rp[i]                      = bcol;
1109       ap[bs2*i + bs*cidx + ridx] = value;
1110       noinsert1:;
1111       low = i;
1112     }
1113     ailen[brow] = nrow;
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*);
1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*);
1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat);
1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat);
1132 
1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1148 
1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1156 
1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1165 
1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1178 {
1179   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1180   Mat         outA;
1181   int         ierr;
1182   PetscTruth  row_identity,col_identity;
1183 
1184   PetscFunctionBegin;
1185   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1186   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1187   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1188   if (!row_identity || !col_identity) {
1189     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1190   }
1191 
1192   outA          = inA;
1193   inA->factor   = FACTOR_LU;
1194 
1195   if (!a->diag) {
1196     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1197   }
1198   /*
1199       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1200       for ILU(0) factorization with natural ordering
1201   */
1202   switch (a->bs) {
1203   case 1:
1204     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1205     inA->ops->solve           = MatSolve_SeqBAIJ_1_NaturalOrdering;
1206     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
1207     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1208   case 2:
1209     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1210     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1211     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1212     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1213     break;
1214   case 3:
1215     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1216     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1217     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1218     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1219     break;
1220   case 4:
1221 #if defined(PETSC_HAVE_SSE)
1222     PetscTruth sse_enabled;
1223     ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
1224     if (sse_enabled) {
1225       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
1226     } else {
1227       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1228     }
1229 #else
1230     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1231 #endif
1232     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1233     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1234     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1235     break;
1236   case 5:
1237     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1238     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1239     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1240     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1241     break;
1242   case 6:
1243     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1244     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1245     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1246     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1247     break;
1248   case 7:
1249     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1250     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1251     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1252     PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1253     break;
1254   default:
1255     a->row        = row;
1256     a->col        = col;
1257     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1258     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1259 
1260     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1261     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1262     PetscLogObjectParent(inA,a->icol);
1263 
1264     if (!a->solve_work) {
1265       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1266       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1267     }
1268   }
1269 
1270   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1271 
1272   PetscFunctionReturn(0);
1273 }
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1276 int MatPrintHelp_SeqBAIJ(Mat A)
1277 {
1278   static PetscTruth called = PETSC_FALSE;
1279   MPI_Comm          comm = A->comm;
1280   int               ierr;
1281 
1282   PetscFunctionBegin;
1283   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1284   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1285   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 EXTERN_C_BEGIN
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1292 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1293 {
1294   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1295   int         i,nz,n;
1296 
1297   PetscFunctionBegin;
1298   nz = baij->maxnz;
1299   n  = mat->n;
1300   for (i=0; i<nz; i++) {
1301     baij->j[i] = indices[i];
1302   }
1303   baij->nz = nz;
1304   for (i=0; i<n; i++) {
1305     baij->ilen[i] = baij->imax[i];
1306   }
1307 
1308   PetscFunctionReturn(0);
1309 }
1310 EXTERN_C_END
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1314 /*@
1315     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1316        in the matrix.
1317 
1318   Input Parameters:
1319 +  mat - the SeqBAIJ matrix
1320 -  indices - the column indices
1321 
1322   Level: advanced
1323 
1324   Notes:
1325     This can be called if you have precomputed the nonzero structure of the
1326   matrix and want to provide it to the matrix object to improve the performance
1327   of the MatSetValues() operation.
1328 
1329     You MUST have set the correct numbers of nonzeros per row in the call to
1330   MatCreateSeqBAIJ().
1331 
1332     MUST be called before any calls to MatSetValues();
1333 
1334 @*/
1335 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1336 {
1337   int ierr,(*f)(Mat,int *);
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1341   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1342   if (f) {
1343     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1344   } else {
1345     SETERRQ(1,"Wrong type of matrix to set column indices");
1346   }
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1352 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1353 {
1354   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1355   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1356   PetscReal    atmp;
1357   Scalar       *x,zero = 0.0;
1358   MatScalar    *aa;
1359   int          ncols,brow,krow,kcol;
1360 
1361   PetscFunctionBegin;
1362   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1363   bs   = a->bs;
1364   aa   = a->a;
1365   ai   = a->i;
1366   aj   = a->j;
1367   mbs = a->mbs;
1368 
1369   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1370   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1371   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1372   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1373   for (i=0; i<mbs; i++) {
1374     ncols = ai[1] - ai[0]; ai++;
1375     brow  = bs*i;
1376     for (j=0; j<ncols; j++){
1377       /* bcol = bs*(*aj); */
1378       for (kcol=0; kcol<bs; kcol++){
1379         for (krow=0; krow<bs; krow++){
1380           atmp = PetscAbsScalar(*aa); aa++;
1381           row = brow + krow;    /* row index */
1382           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1383           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1384         }
1385       }
1386       aj++;
1387     }
1388   }
1389   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1395 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1396 {
1397   int        ierr;
1398 
1399   PetscFunctionBegin;
1400   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1406 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1407 {
1408   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1409   PetscFunctionBegin;
1410   *array = a->a;
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1416 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1417 {
1418   PetscFunctionBegin;
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 /* -------------------------------------------------------------------*/
1423 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1424        MatGetRow_SeqBAIJ,
1425        MatRestoreRow_SeqBAIJ,
1426        MatMult_SeqBAIJ_N,
1427        MatMultAdd_SeqBAIJ_N,
1428        MatMultTranspose_SeqBAIJ,
1429        MatMultTransposeAdd_SeqBAIJ,
1430        MatSolve_SeqBAIJ_N,
1431        0,
1432        0,
1433        0,
1434        MatLUFactor_SeqBAIJ,
1435        0,
1436        0,
1437        MatTranspose_SeqBAIJ,
1438        MatGetInfo_SeqBAIJ,
1439        MatEqual_SeqBAIJ,
1440        MatGetDiagonal_SeqBAIJ,
1441        MatDiagonalScale_SeqBAIJ,
1442        MatNorm_SeqBAIJ,
1443        0,
1444        MatAssemblyEnd_SeqBAIJ,
1445        0,
1446        MatSetOption_SeqBAIJ,
1447        MatZeroEntries_SeqBAIJ,
1448        MatZeroRows_SeqBAIJ,
1449        MatLUFactorSymbolic_SeqBAIJ,
1450        MatLUFactorNumeric_SeqBAIJ_N,
1451        0,
1452        0,
1453        MatSetUpPreallocation_SeqBAIJ,
1454        0,
1455        MatGetOwnershipRange_SeqBAIJ,
1456        MatILUFactorSymbolic_SeqBAIJ,
1457        0,
1458        MatGetArray_SeqBAIJ,
1459        MatRestoreArray_SeqBAIJ,
1460        MatDuplicate_SeqBAIJ,
1461        0,
1462        0,
1463        MatILUFactor_SeqBAIJ,
1464        0,
1465        0,
1466        MatGetSubMatrices_SeqBAIJ,
1467        MatIncreaseOverlap_SeqBAIJ,
1468        MatGetValues_SeqBAIJ,
1469        0,
1470        MatPrintHelp_SeqBAIJ,
1471        MatScale_SeqBAIJ,
1472        0,
1473        0,
1474        0,
1475        MatGetBlockSize_SeqBAIJ,
1476        MatGetRowIJ_SeqBAIJ,
1477        MatRestoreRowIJ_SeqBAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482        0,
1483        0,
1484        MatSetValuesBlocked_SeqBAIJ,
1485        MatGetSubMatrix_SeqBAIJ,
1486        MatDestroy_SeqBAIJ,
1487        MatView_SeqBAIJ,
1488        MatGetMaps_Petsc,
1489        0,
1490        0,
1491        0,
1492        0,
1493        0,
1494        0,
1495        MatGetRowMax_SeqBAIJ,
1496        MatConvert_Basic};
1497 
1498 EXTERN_C_BEGIN
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1501 int MatStoreValues_SeqBAIJ(Mat mat)
1502 {
1503   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1504   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1505   int         ierr;
1506 
1507   PetscFunctionBegin;
1508   if (aij->nonew != 1) {
1509     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1510   }
1511 
1512   /* allocate space for values if not already there */
1513   if (!aij->saved_values) {
1514     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1515   }
1516 
1517   /* copy values over */
1518   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 EXTERN_C_END
1522 
1523 EXTERN_C_BEGIN
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1526 int MatRetrieveValues_SeqBAIJ(Mat mat)
1527 {
1528   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1529   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1530 
1531   PetscFunctionBegin;
1532   if (aij->nonew != 1) {
1533     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1534   }
1535   if (!aij->saved_values) {
1536     SETERRQ(1,"Must call MatStoreValues(A);first");
1537   }
1538 
1539   /* copy values over */
1540   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 EXTERN_C_END
1544 
1545 EXTERN_C_BEGIN
1546 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1547 EXTERN_C_END
1548 
1549 EXTERN_C_BEGIN
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatCreate_SeqBAIJ"
1552 int MatCreate_SeqBAIJ(Mat B)
1553 {
1554   int         ierr,size;
1555   Mat_SeqBAIJ *b;
1556 
1557   PetscFunctionBegin;
1558   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1559   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1560 
1561   B->m = B->M = PetscMax(B->m,B->M);
1562   B->n = B->N = PetscMax(B->n,B->N);
1563   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1564   B->data = (void*)b;
1565   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1566   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1567   B->factor           = 0;
1568   B->lupivotthreshold = 1.0;
1569   B->mapping          = 0;
1570   b->row              = 0;
1571   b->col              = 0;
1572   b->icol             = 0;
1573   b->reallocs         = 0;
1574   b->saved_values     = 0;
1575 #if defined(PETSC_USE_MAT_SINGLE)
1576   b->setvalueslen     = 0;
1577   b->setvaluescopy    = PETSC_NULL;
1578 #endif
1579 
1580   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1581   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1582 
1583   b->sorted           = PETSC_FALSE;
1584   b->roworiented      = PETSC_TRUE;
1585   b->nonew            = 0;
1586   b->diag             = 0;
1587   b->solve_work       = 0;
1588   b->mult_work        = 0;
1589   b->spptr            = 0;
1590   B->info.nz_unneeded = (PetscReal)b->maxnz;
1591   b->keepzeroedrows   = PETSC_FALSE;
1592 
1593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1594                                      "MatStoreValues_SeqBAIJ",
1595                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1597                                      "MatRetrieveValues_SeqBAIJ",
1598                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1600                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1601                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1603                                      "MatConvert_SeqBAIJ_SeqAIJ",
1604                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1605   PetscFunctionReturn(0);
1606 }
1607 EXTERN_C_END
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1611 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1612 {
1613   Mat         C;
1614   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1615   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1616 
1617   PetscFunctionBegin;
1618   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1619 
1620   *B = 0;
1621   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1622   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1623   c    = (Mat_SeqBAIJ*)C->data;
1624 
1625   c->bs         = a->bs;
1626   c->bs2        = a->bs2;
1627   c->mbs        = a->mbs;
1628   c->nbs        = a->nbs;
1629   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1630 
1631   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1632   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1633   for (i=0; i<mbs; i++) {
1634     c->imax[i] = a->imax[i];
1635     c->ilen[i] = a->ilen[i];
1636   }
1637 
1638   /* allocate the matrix space */
1639   c->singlemalloc = PETSC_TRUE;
1640   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1641   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1642   c->j = (int*)(c->a + nz*bs2);
1643   c->i = c->j + nz;
1644   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1645   if (mbs > 0) {
1646     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1647     if (cpvalues == MAT_COPY_VALUES) {
1648       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1649     } else {
1650       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1651     }
1652   }
1653 
1654   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1655   c->sorted      = a->sorted;
1656   c->roworiented = a->roworiented;
1657   c->nonew       = a->nonew;
1658 
1659   if (a->diag) {
1660     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1661     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1662     for (i=0; i<mbs; i++) {
1663       c->diag[i] = a->diag[i];
1664     }
1665   } else c->diag        = 0;
1666   c->nz                 = a->nz;
1667   c->maxnz              = a->maxnz;
1668   c->solve_work         = 0;
1669   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1670   c->mult_work          = 0;
1671   C->preallocated       = PETSC_TRUE;
1672   C->assembled          = PETSC_TRUE;
1673   *B = C;
1674   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 EXTERN_C_BEGIN
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatLoad_SeqBAIJ"
1681 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1682 {
1683   Mat_SeqBAIJ  *a;
1684   Mat          B;
1685   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1686   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1687   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1688   int          *masked,nmask,tmp,bs2,ishift;
1689   Scalar       *aa;
1690   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1694   bs2  = bs*bs;
1695 
1696   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1697   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1698   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1699   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1700   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1701   M = header[1]; N = header[2]; nz = header[3];
1702 
1703   if (header[3] < 0) {
1704     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1705   }
1706 
1707   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1708 
1709   /*
1710      This code adds extra rows to make sure the number of rows is
1711     divisible by the blocksize
1712   */
1713   mbs        = M/bs;
1714   extra_rows = bs - M + bs*(mbs);
1715   if (extra_rows == bs) extra_rows = 0;
1716   else                  mbs++;
1717   if (extra_rows) {
1718     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1719   }
1720 
1721   /* read in row lengths */
1722   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1723   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1724   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1725 
1726   /* read in column indices */
1727   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1728   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1729   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1730 
1731   /* loop over row lengths determining block row lengths */
1732   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1733   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1734   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1735   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1736   masked   = mask + mbs;
1737   rowcount = 0; nzcount = 0;
1738   for (i=0; i<mbs; i++) {
1739     nmask = 0;
1740     for (j=0; j<bs; j++) {
1741       kmax = rowlengths[rowcount];
1742       for (k=0; k<kmax; k++) {
1743         tmp = jj[nzcount++]/bs;
1744         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1745       }
1746       rowcount++;
1747     }
1748     browlengths[i] += nmask;
1749     /* zero out the mask elements we set */
1750     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1751   }
1752 
1753   /* create our matrix */
1754   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1755   B = *A;
1756   a = (Mat_SeqBAIJ*)B->data;
1757 
1758   /* set matrix "i" values */
1759   a->i[0] = 0;
1760   for (i=1; i<= mbs; i++) {
1761     a->i[i]      = a->i[i-1] + browlengths[i-1];
1762     a->ilen[i-1] = browlengths[i-1];
1763   }
1764   a->nz         = 0;
1765   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1766 
1767   /* read in nonzero values */
1768   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1769   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1770   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1771 
1772   /* set "a" and "j" values into matrix */
1773   nzcount = 0; jcount = 0;
1774   for (i=0; i<mbs; i++) {
1775     nzcountb = nzcount;
1776     nmask    = 0;
1777     for (j=0; j<bs; j++) {
1778       kmax = rowlengths[i*bs+j];
1779       for (k=0; k<kmax; k++) {
1780         tmp = jj[nzcount++]/bs;
1781 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1782       }
1783     }
1784     /* sort the masked values */
1785     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1786 
1787     /* set "j" values into matrix */
1788     maskcount = 1;
1789     for (j=0; j<nmask; j++) {
1790       a->j[jcount++]  = masked[j];
1791       mask[masked[j]] = maskcount++;
1792     }
1793     /* set "a" values into matrix */
1794     ishift = bs2*a->i[i];
1795     for (j=0; j<bs; j++) {
1796       kmax = rowlengths[i*bs+j];
1797       for (k=0; k<kmax; k++) {
1798         tmp       = jj[nzcountb]/bs ;
1799         block     = mask[tmp] - 1;
1800         point     = jj[nzcountb] - bs*tmp;
1801         idx       = ishift + bs2*block + j + bs*point;
1802         a->a[idx] = (MatScalar)aa[nzcountb++];
1803       }
1804     }
1805     /* zero out the mask elements we set */
1806     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1807   }
1808   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1809 
1810   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1811   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1812   ierr = PetscFree(aa);CHKERRQ(ierr);
1813   ierr = PetscFree(jj);CHKERRQ(ierr);
1814   ierr = PetscFree(mask);CHKERRQ(ierr);
1815 
1816   B->assembled = PETSC_TRUE;
1817 
1818   ierr = MatView_Private(B);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatCreateSeqBAIJ"
1825 /*@C
1826    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1827    compressed row) format.  For good matrix assembly performance the
1828    user should preallocate the matrix storage by setting the parameter nz
1829    (or the array nnz).  By setting these parameters accurately, performance
1830    during matrix assembly can be increased by more than a factor of 50.
1831 
1832    Collective on MPI_Comm
1833 
1834    Input Parameters:
1835 +  comm - MPI communicator, set to PETSC_COMM_SELF
1836 .  bs - size of block
1837 .  m - number of rows
1838 .  n - number of columns
1839 .  nz - number of nonzero blocks  per block row (same for all rows)
1840 -  nnz - array containing the number of nonzero blocks in the various block rows
1841          (possibly different for each block row) or PETSC_NULL
1842 
1843    Output Parameter:
1844 .  A - the matrix
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .    -mat_block_size - size of the blocks to use
1850 
1851    Level: intermediate
1852 
1853    Notes:
1854    A nonzero block is any block that as 1 or more nonzeros in it
1855 
1856    The block AIJ format is fully compatible with standard Fortran 77
1857    storage.  That is, the stored row and column indices can begin at
1858    either one (as in Fortran) or zero.  See the users' manual for details.
1859 
1860    Specify the preallocated storage with either nz or nnz (not both).
1861    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1862    allocation.  For additional details, see the users manual chapter on
1863    matrices.
1864 
1865 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1866 @*/
1867 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1868 {
1869   int ierr;
1870 
1871   PetscFunctionBegin;
1872   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1873   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1874   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1880 /*@C
1881    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1882    per row in the matrix. For good matrix assembly performance the
1883    user should preallocate the matrix storage by setting the parameter nz
1884    (or the array nnz).  By setting these parameters accurately, performance
1885    during matrix assembly can be increased by more than a factor of 50.
1886 
1887    Collective on MPI_Comm
1888 
1889    Input Parameters:
1890 +  A - the matrix
1891 .  bs - size of block
1892 .  nz - number of block nonzeros per block row (same for all rows)
1893 -  nnz - array containing the number of block nonzeros in the various block rows
1894          (possibly different for each block row) or PETSC_NULL
1895 
1896    Options Database Keys:
1897 .   -mat_no_unroll - uses code that does not unroll the loops in the
1898                      block calculations (much slower)
1899 .    -mat_block_size - size of the blocks to use
1900 
1901    Level: intermediate
1902 
1903    Notes:
1904    The block AIJ format is fully compatible with standard Fortran 77
1905    storage.  That is, the stored row and column indices can begin at
1906    either one (as in Fortran) or zero.  See the users' manual for details.
1907 
1908    Specify the preallocated storage with either nz or nnz (not both).
1909    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1910    allocation.  For additional details, see the users manual chapter on
1911    matrices.
1912 
1913 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1914 @*/
1915 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1916 {
1917   Mat_SeqBAIJ *b;
1918   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1919   PetscTruth  flg;
1920 
1921   PetscFunctionBegin;
1922   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1923   if (!flg) PetscFunctionReturn(0);
1924 
1925   B->preallocated = PETSC_TRUE;
1926   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1927   if (nnz && newbs != bs) {
1928     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1929   }
1930   bs   = newbs;
1931 
1932   mbs  = B->m/bs;
1933   nbs  = B->n/bs;
1934   bs2  = bs*bs;
1935 
1936   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1937     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1938   }
1939 
1940   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1941   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1942   if (nnz) {
1943     for (i=0; i<mbs; i++) {
1944       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1945       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1946     }
1947   }
1948 
1949   b       = (Mat_SeqBAIJ*)B->data;
1950   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1951   if (!flg) {
1952     switch (bs) {
1953     case 1:
1954       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1955       B->ops->solve           = MatSolve_SeqBAIJ_1;
1956       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1957       B->ops->mult            = MatMult_SeqBAIJ_1;
1958       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1959       break;
1960     case 2:
1961       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1962       B->ops->solve           = MatSolve_SeqBAIJ_2;
1963       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1964       B->ops->mult            = MatMult_SeqBAIJ_2;
1965       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1966       break;
1967     case 3:
1968       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1969       B->ops->solve           = MatSolve_SeqBAIJ_3;
1970       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1971       B->ops->mult            = MatMult_SeqBAIJ_3;
1972       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1973       break;
1974     case 4:
1975       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1976       B->ops->solve           = MatSolve_SeqBAIJ_4;
1977       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1978       B->ops->mult            = MatMult_SeqBAIJ_4;
1979       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1980       break;
1981     case 5:
1982       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1983       B->ops->solve           = MatSolve_SeqBAIJ_5;
1984       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1985       B->ops->mult            = MatMult_SeqBAIJ_5;
1986       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1987       break;
1988     case 6:
1989       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1990       B->ops->solve           = MatSolve_SeqBAIJ_6;
1991       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1992       B->ops->mult            = MatMult_SeqBAIJ_6;
1993       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1994       break;
1995     case 7:
1996       B->ops->mult            = MatMult_SeqBAIJ_7;
1997       B->ops->solve           = MatSolve_SeqBAIJ_7;
1998       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
1999       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2000       break;
2001     default:
2002       B->ops->mult            = MatMult_SeqBAIJ_N;
2003       B->ops->solve           = MatSolve_SeqBAIJ_N;
2004       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2005       break;
2006     }
2007   }
2008   b->bs      = bs;
2009   b->mbs     = mbs;
2010   b->nbs     = nbs;
2011   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2012   if (!nnz) {
2013     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2014     else if (nz <= 0)        nz = 1;
2015     for (i=0; i<mbs; i++) b->imax[i] = nz;
2016     nz = nz*mbs;
2017   } else {
2018     nz = 0;
2019     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2020   }
2021 
2022   /* allocate the matrix space */
2023   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2024   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2025   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2026   b->j  = (int*)(b->a + nz*bs2);
2027   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2028   b->i  = b->j + nz;
2029   b->singlemalloc = PETSC_TRUE;
2030 
2031   b->i[0] = 0;
2032   for (i=1; i<mbs+1; i++) {
2033     b->i[i] = b->i[i-1] + b->imax[i-1];
2034   }
2035 
2036   /* b->ilen will count nonzeros in each block row so far. */
2037   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2038   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2039   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2040 
2041   b->bs               = bs;
2042   b->bs2              = bs2;
2043   b->mbs              = mbs;
2044   b->nz               = 0;
2045   b->maxnz            = nz*bs2;
2046   B->info.nz_unneeded = (PetscReal)b->maxnz;
2047   PetscFunctionReturn(0);
2048 }
2049 
2050