xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 54138f6b3f52447056571583551f7dcee5898a63)
1 /*$Id: baij.c,v 1.235 2001/07/12 01:48:59 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled,single_prec,flg;
217       int        ierr;
218 
219       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
220       ierr = PetscOptionsGetLogical(PETSC_NULL,"-mat_single_precision_solves",&single_prec,&flg);CHKERRQ(ierr);
221       if (sse_enabled && flg && single_prec) {
222           a->single_precision_solves = PETSC_TRUE;
223           A->ops->solve              = MatSolve_SeqBAIJ_Update;
224           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
225           break;
226       }
227       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
228     }
229     break;
230   default:
231     SETERRQ(PETSC_ERR_SUP,"unknown option");
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
238 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
239 {
240   PetscFunctionBegin;
241   if (m) *m = 0;
242   if (n) *n = A->m;
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatGetRow_SeqBAIJ"
248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
249 {
250   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
251   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
252   MatScalar    *aa,*aa_i;
253   Scalar       *v_i;
254 
255   PetscFunctionBegin;
256   bs  = a->bs;
257   ai  = a->i;
258   aj  = a->j;
259   aa  = a->a;
260   bs2 = a->bs2;
261 
262   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
263 
264   bn  = row/bs;   /* Block number */
265   bp  = row % bs; /* Block Position */
266   M   = ai[bn+1] - ai[bn];
267   *nz = bs*M;
268 
269   if (v) {
270     *v = 0;
271     if (*nz) {
272       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
273       for (i=0; i<M; i++) { /* for each block in the block row */
274         v_i  = *v + i*bs;
275         aa_i = aa + bs2*(ai[bn] + i);
276         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
277       }
278     }
279   }
280 
281   if (idx) {
282     *idx = 0;
283     if (*nz) {
284       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
285       for (i=0; i<M; i++) { /* for each block in the block row */
286         idx_i = *idx + i*bs;
287         itmp  = bs*aj[ai[bn] + i];
288         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
289       }
290     }
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
298 {
299   int ierr;
300 
301   PetscFunctionBegin;
302   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
303   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatTranspose_SeqBAIJ"
309 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
310 {
311   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
312   Mat         C;
313   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
314   int         *rows,*cols,bs2=a->bs2;
315   Scalar      *array;
316 
317   PetscFunctionBegin;
318   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
319   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
320   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
321 
322 #if defined(PETSC_USE_MAT_SINGLE)
323   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
324   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
325 #else
326   array = a->a;
327 #endif
328 
329   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
330   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
331   ierr = PetscFree(col);CHKERRQ(ierr);
332   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
333   cols = rows + bs;
334   for (i=0; i<mbs; i++) {
335     cols[0] = i*bs;
336     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
337     len = ai[i+1] - ai[i];
338     for (j=0; j<len; j++) {
339       rows[0] = (*aj++)*bs;
340       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
341       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
342       array += bs2;
343     }
344   }
345   ierr = PetscFree(rows);CHKERRQ(ierr);
346 #if defined(PETSC_USE_MAT_SINGLE)
347   ierr = PetscFree(array);
348 #endif
349 
350   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352 
353   if (B) {
354     *B = C;
355   } else {
356     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
364 {
365   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
366   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
367   Scalar      *aa;
368   FILE        *file;
369 
370   PetscFunctionBegin;
371   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
372   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
373   col_lens[0] = MAT_COOKIE;
374 
375   col_lens[1] = A->m;
376   col_lens[2] = A->n;
377   col_lens[3] = a->nz*bs2;
378 
379   /* store lengths of each row and write (including header) to file */
380   count = 0;
381   for (i=0; i<a->mbs; i++) {
382     for (j=0; j<bs; j++) {
383       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
384     }
385   }
386   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
387   ierr = PetscFree(col_lens);CHKERRQ(ierr);
388 
389   /* store column indices (zero start index) */
390   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
391   count = 0;
392   for (i=0; i<a->mbs; i++) {
393     for (j=0; j<bs; j++) {
394       for (k=a->i[i]; k<a->i[i+1]; k++) {
395         for (l=0; l<bs; l++) {
396           jj[count++] = bs*a->j[k] + l;
397         }
398       }
399     }
400   }
401   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
402   ierr = PetscFree(jj);CHKERRQ(ierr);
403 
404   /* store nonzero values */
405   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
406   count = 0;
407   for (i=0; i<a->mbs; i++) {
408     for (j=0; j<bs; j++) {
409       for (k=a->i[i]; k<a->i[i+1]; k++) {
410         for (l=0; l<bs; l++) {
411           aa[count++] = a->a[bs2*k + l*bs + j];
412         }
413       }
414     }
415   }
416   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
417   ierr = PetscFree(aa);CHKERRQ(ierr);
418 
419   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
420   if (file) {
421     fprintf(file,"-matload_block_size %d\n",a->bs);
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
428 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
429 {
430   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
431   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
432   PetscViewerFormat format;
433 
434   PetscFunctionBegin;
435   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
436   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
437     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
438   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
439     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
440   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
441     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
442     for (i=0; i<a->mbs; i++) {
443       for (j=0; j<bs; j++) {
444         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
445         for (k=a->i[i]; k<a->i[i+1]; k++) {
446           for (l=0; l<bs; l++) {
447 #if defined(PETSC_USE_COMPLEX)
448             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
449               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
450                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
451             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
452               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
453                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
454             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
455               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             }
457 #else
458             if (a->a[bs2*k + l*bs + j] != 0.0) {
459               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
460             }
461 #endif
462           }
463         }
464         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
465       }
466     }
467     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
468   } else {
469     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
470     for (i=0; i<a->mbs; i++) {
471       for (j=0; j<bs; j++) {
472         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
473         for (k=a->i[i]; k<a->i[i+1]; k++) {
474           for (l=0; l<bs; l++) {
475 #if defined(PETSC_USE_COMPLEX)
476             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
477               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
478                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
479             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
480               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
481                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
482             } else {
483               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             }
485 #else
486             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
487 #endif
488           }
489         }
490         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
491       }
492     }
493     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
494   }
495   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
501 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
502 {
503   Mat          A = (Mat) Aa;
504   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
505   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
506   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
507   MatScalar    *aa;
508   PetscViewer  viewer;
509 
510   PetscFunctionBegin;
511 
512   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
513   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
514 
515   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
516 
517   /* loop over matrix elements drawing boxes */
518   color = PETSC_DRAW_BLUE;
519   for (i=0,row=0; i<mbs; i++,row+=bs) {
520     for (j=a->i[i]; j<a->i[i+1]; j++) {
521       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
522       x_l = a->j[j]*bs; x_r = x_l + 1.0;
523       aa = a->a + j*bs2;
524       for (k=0; k<bs; k++) {
525         for (l=0; l<bs; l++) {
526           if (PetscRealPart(*aa++) >=  0.) continue;
527           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
528         }
529       }
530     }
531   }
532   color = PETSC_DRAW_CYAN;
533   for (i=0,row=0; i<mbs; i++,row+=bs) {
534     for (j=a->i[i]; j<a->i[i+1]; j++) {
535       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
536       x_l = a->j[j]*bs; x_r = x_l + 1.0;
537       aa = a->a + j*bs2;
538       for (k=0; k<bs; k++) {
539         for (l=0; l<bs; l++) {
540           if (PetscRealPart(*aa++) != 0.) continue;
541           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
542         }
543       }
544     }
545   }
546 
547   color = PETSC_DRAW_RED;
548   for (i=0,row=0; i<mbs; i++,row+=bs) {
549     for (j=a->i[i]; j<a->i[i+1]; j++) {
550       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
551       x_l = a->j[j]*bs; x_r = x_l + 1.0;
552       aa = a->a + j*bs2;
553       for (k=0; k<bs; k++) {
554         for (l=0; l<bs; l++) {
555           if (PetscRealPart(*aa++) <= 0.) continue;
556           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
557         }
558       }
559     }
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
566 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
567 {
568   int          ierr;
569   PetscReal    xl,yl,xr,yr,w,h;
570   PetscDraw    draw;
571   PetscTruth   isnull;
572 
573   PetscFunctionBegin;
574 
575   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
576   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
577 
578   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
579   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
580   xr += w;    yr += h;  xl = -w;     yl = -h;
581   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
582   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
583   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatView_SeqBAIJ"
589 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
590 {
591   int        ierr;
592   PetscTruth issocket,isascii,isbinary,isdraw;
593 
594   PetscFunctionBegin;
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
599   if (issocket) {
600     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
601   } else if (isascii){
602     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
603   } else if (isbinary) {
604     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
605   } else if (isdraw) {
606     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
607   } else {
608     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 
614 #undef __FUNCT__
615 #define __FUNCT__ "MatGetValues_SeqBAIJ"
616 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
617 {
618   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
619   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
620   int        *ai = a->i,*ailen = a->ilen;
621   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
622   MatScalar  *ap,*aa = a->a,zero = 0.0;
623 
624   PetscFunctionBegin;
625   for (k=0; k<m; k++) { /* loop over rows */
626     row  = im[k]; brow = row/bs;
627     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
628     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
629     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
630     nrow = ailen[brow];
631     for (l=0; l<n; l++) { /* loop over columns */
632       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
633       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
634       col  = in[l] ;
635       bcol = col/bs;
636       cidx = col%bs;
637       ridx = row%bs;
638       high = nrow;
639       low  = 0; /* assume unsorted */
640       while (high-low > 5) {
641         t = (low+high)/2;
642         if (rp[t] > bcol) high = t;
643         else             low  = t;
644       }
645       for (i=low; i<high; i++) {
646         if (rp[i] > bcol) break;
647         if (rp[i] == bcol) {
648           *v++ = ap[bs2*i+bs*cidx+ridx];
649           goto finished;
650         }
651       }
652       *v++ = zero;
653       finished:;
654     }
655   }
656   PetscFunctionReturn(0);
657 }
658 
659 #if defined(PETSC_USE_MAT_SINGLE)
660 #undef __FUNCT__
661 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
662 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
663 {
664   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
665   int         ierr,i,N = m*n*b->bs2;
666   MatScalar   *vsingle;
667 
668   PetscFunctionBegin;
669   if (N > b->setvalueslen) {
670     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
671     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
672     b->setvalueslen  = N;
673   }
674   vsingle = b->setvaluescopy;
675   for (i=0; i<N; i++) {
676     vsingle[i] = v[i];
677   }
678   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
679   PetscFunctionReturn(0);
680 }
681 #endif
682 
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
686 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
687 {
688   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
689   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
690   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
691   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
692   PetscTruth  roworiented=a->roworiented;
693   MatScalar   *value = v,*ap,*aa = a->a,*bap;
694 
695   PetscFunctionBegin;
696   if (roworiented) {
697     stepval = (n-1)*bs;
698   } else {
699     stepval = (m-1)*bs;
700   }
701   for (k=0; k<m; k++) { /* loop over added rows */
702     row  = im[k];
703     if (row < 0) continue;
704 #if defined(PETSC_USE_BOPT_g)
705     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
706 #endif
707     rp   = aj + ai[row];
708     ap   = aa + bs2*ai[row];
709     rmax = imax[row];
710     nrow = ailen[row];
711     low  = 0;
712     for (l=0; l<n; l++) { /* loop over added columns */
713       if (in[l] < 0) continue;
714 #if defined(PETSC_USE_BOPT_g)
715       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
716 #endif
717       col = in[l];
718       if (roworiented) {
719         value = v + k*(stepval+bs)*bs + l*bs;
720       } else {
721         value = v + l*(stepval+bs)*bs + k*bs;
722       }
723       if (!sorted) low = 0; high = nrow;
724       while (high-low > 7) {
725         t = (low+high)/2;
726         if (rp[t] > col) high = t;
727         else             low  = t;
728       }
729       for (i=low; i<high; i++) {
730         if (rp[i] > col) break;
731         if (rp[i] == col) {
732           bap  = ap +  bs2*i;
733           if (roworiented) {
734             if (is == ADD_VALUES) {
735               for (ii=0; ii<bs; ii++,value+=stepval) {
736                 for (jj=ii; jj<bs2; jj+=bs) {
737                   bap[jj] += *value++;
738                 }
739               }
740             } else {
741               for (ii=0; ii<bs; ii++,value+=stepval) {
742                 for (jj=ii; jj<bs2; jj+=bs) {
743                   bap[jj] = *value++;
744                 }
745               }
746             }
747           } else {
748             if (is == ADD_VALUES) {
749               for (ii=0; ii<bs; ii++,value+=stepval) {
750                 for (jj=0; jj<bs; jj++) {
751                   *bap++ += *value++;
752                 }
753               }
754             } else {
755               for (ii=0; ii<bs; ii++,value+=stepval) {
756                 for (jj=0; jj<bs; jj++) {
757                   *bap++  = *value++;
758                 }
759               }
760             }
761           }
762           goto noinsert2;
763         }
764       }
765       if (nonew == 1) goto noinsert2;
766       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
767       if (nrow >= rmax) {
768         /* there is no extra room in row, therefore enlarge */
769         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
770         MatScalar *new_a;
771 
772         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
773 
774         /* malloc new storage space */
775         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
776 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
777         new_j   = (int*)(new_a + bs2*new_nz);
778         new_i   = new_j + new_nz;
779 
780         /* copy over old data into new slots */
781         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
782         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
783         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
784         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
785         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
786         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
787         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
788         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
789         /* free up old matrix storage */
790         ierr = PetscFree(a->a);CHKERRQ(ierr);
791         if (!a->singlemalloc) {
792           ierr = PetscFree(a->i);CHKERRQ(ierr);
793           ierr = PetscFree(a->j);CHKERRQ(ierr);
794         }
795         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
796         a->singlemalloc = PETSC_TRUE;
797 
798         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
799         rmax = imax[row] = imax[row] + CHUNKSIZE;
800         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
801         a->maxnz += bs2*CHUNKSIZE;
802         a->reallocs++;
803         a->nz++;
804       }
805       N = nrow++ - 1;
806       /* shift up all the later entries in this row */
807       for (ii=N; ii>=i; ii--) {
808         rp[ii+1] = rp[ii];
809         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
810       }
811       if (N >= i) {
812         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
813       }
814       rp[i] = col;
815       bap   = ap +  bs2*i;
816       if (roworiented) {
817         for (ii=0; ii<bs; ii++,value+=stepval) {
818           for (jj=ii; jj<bs2; jj+=bs) {
819             bap[jj] = *value++;
820           }
821         }
822       } else {
823         for (ii=0; ii<bs; ii++,value+=stepval) {
824           for (jj=0; jj<bs; jj++) {
825             *bap++  = *value++;
826           }
827         }
828       }
829       noinsert2:;
830       low = i;
831     }
832     ailen[row] = nrow;
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
840 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
841 {
842   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
843   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
844   int        m = A->m,*ip,N,*ailen = a->ilen;
845   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
846   MatScalar  *aa = a->a,*ap;
847 
848   PetscFunctionBegin;
849   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
850 
851   if (m) rmax = ailen[0];
852   for (i=1; i<mbs; i++) {
853     /* move each row back by the amount of empty slots (fshift) before it*/
854     fshift += imax[i-1] - ailen[i-1];
855     rmax   = PetscMax(rmax,ailen[i]);
856     if (fshift) {
857       ip = aj + ai[i]; ap = aa + bs2*ai[i];
858       N = ailen[i];
859       for (j=0; j<N; j++) {
860         ip[j-fshift] = ip[j];
861         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
862       }
863     }
864     ai[i] = ai[i-1] + ailen[i-1];
865   }
866   if (mbs) {
867     fshift += imax[mbs-1] - ailen[mbs-1];
868     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
869   }
870   /* reset ilen and imax for each row */
871   for (i=0; i<mbs; i++) {
872     ailen[i] = imax[i] = ai[i+1] - ai[i];
873   }
874   a->nz = ai[mbs];
875 
876   /* diagonals may have moved, so kill the diagonal pointers */
877   if (fshift && a->diag) {
878     ierr = PetscFree(a->diag);CHKERRQ(ierr);
879     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
880     a->diag = 0;
881   }
882   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
883            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
885            a->reallocs);
886   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
887   a->reallocs          = 0;
888   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
889 
890   PetscFunctionReturn(0);
891 }
892 
893 
894 
895 /*
896    This function returns an array of flags which indicate the locations of contiguous
897    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
898    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
899    Assume: sizes should be long enough to hold all the values.
900 */
901 #undef __FUNCT__
902 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
903 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
904 {
905   int        i,j,k,row;
906   PetscTruth flg;
907 
908   PetscFunctionBegin;
909   for (i=0,j=0; i<n; j++) {
910     row = idx[i];
911     if (row%bs!=0) { /* Not the begining of a block */
912       sizes[j] = 1;
913       i++;
914     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
915       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
916       i++;
917     } else { /* Begining of the block, so check if the complete block exists */
918       flg = PETSC_TRUE;
919       for (k=1; k<bs; k++) {
920         if (row+k != idx[i+k]) { /* break in the block */
921           flg = PETSC_FALSE;
922           break;
923         }
924       }
925       if (flg == PETSC_TRUE) { /* No break in the bs */
926         sizes[j] = bs;
927         i+= bs;
928       } else {
929         sizes[j] = 1;
930         i++;
931       }
932     }
933   }
934   *bs_max = j;
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
940 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
941 {
942   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
943   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
944   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
945   Scalar      zero = 0.0;
946   MatScalar   *aa;
947 
948   PetscFunctionBegin;
949   /* Make a copy of the IS and  sort it */
950   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
951   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
952 
953   /* allocate memory for rows,sizes */
954   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
955   sizes = rows + is_n;
956 
957   /* copy IS values to rows, and sort them */
958   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
959   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
960   if (baij->keepzeroedrows) {
961     for (i=0; i<is_n; i++) { sizes[i] = 1; }
962     bs_max = is_n;
963   } else {
964     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
965   }
966   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
967 
968   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
969     row   = rows[j];
970     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
971     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
972     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
973     if (sizes[i] == bs && !baij->keepzeroedrows) {
974       if (diag) {
975         if (baij->ilen[row/bs] > 0) {
976           baij->ilen[row/bs]       = 1;
977           baij->j[baij->i[row/bs]] = row/bs;
978           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
979         }
980         /* Now insert all the diagonal values for this bs */
981         for (k=0; k<bs; k++) {
982           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
983         }
984       } else { /* (!diag) */
985         baij->ilen[row/bs] = 0;
986       } /* end (!diag) */
987     } else { /* (sizes[i] != bs) */
988 #if defined (PETSC_USE_DEBUG)
989       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
990 #endif
991       for (k=0; k<count; k++) {
992         aa[0] =  zero;
993         aa    += bs;
994       }
995       if (diag) {
996         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
997       }
998     }
999   }
1000 
1001   ierr = PetscFree(rows);CHKERRQ(ierr);
1002   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1008 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1009 {
1010   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1011   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1012   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1013   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1014   int         ridx,cidx,bs2=a->bs2,ierr;
1015   PetscTruth  roworiented=a->roworiented;
1016   MatScalar   *ap,value,*aa=a->a,*bap;
1017 
1018   PetscFunctionBegin;
1019   for (k=0; k<m; k++) { /* loop over added rows */
1020     row  = im[k]; brow = row/bs;
1021     if (row < 0) continue;
1022 #if defined(PETSC_USE_BOPT_g)
1023     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1024 #endif
1025     rp   = aj + ai[brow];
1026     ap   = aa + bs2*ai[brow];
1027     rmax = imax[brow];
1028     nrow = ailen[brow];
1029     low  = 0;
1030     for (l=0; l<n; l++) { /* loop over added columns */
1031       if (in[l] < 0) continue;
1032 #if defined(PETSC_USE_BOPT_g)
1033       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1034 #endif
1035       col = in[l]; bcol = col/bs;
1036       ridx = row % bs; cidx = col % bs;
1037       if (roworiented) {
1038         value = v[l + k*n];
1039       } else {
1040         value = v[k + l*m];
1041       }
1042       if (!sorted) low = 0; high = nrow;
1043       while (high-low > 7) {
1044         t = (low+high)/2;
1045         if (rp[t] > bcol) high = t;
1046         else              low  = t;
1047       }
1048       for (i=low; i<high; i++) {
1049         if (rp[i] > bcol) break;
1050         if (rp[i] == bcol) {
1051           bap  = ap +  bs2*i + bs*cidx + ridx;
1052           if (is == ADD_VALUES) *bap += value;
1053           else                  *bap  = value;
1054           goto noinsert1;
1055         }
1056       }
1057       if (nonew == 1) goto noinsert1;
1058       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1059       if (nrow >= rmax) {
1060         /* there is no extra room in row, therefore enlarge */
1061         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1062         MatScalar *new_a;
1063 
1064         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1065 
1066         /* Malloc new storage space */
1067         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1068 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1069         new_j   = (int*)(new_a + bs2*new_nz);
1070         new_i   = new_j + new_nz;
1071 
1072         /* copy over old data into new slots */
1073         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1074         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1075         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1076         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1077         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1078         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1079         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1080         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1081         /* free up old matrix storage */
1082         ierr = PetscFree(a->a);CHKERRQ(ierr);
1083         if (!a->singlemalloc) {
1084           ierr = PetscFree(a->i);CHKERRQ(ierr);
1085           ierr = PetscFree(a->j);CHKERRQ(ierr);
1086         }
1087         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1088         a->singlemalloc = PETSC_TRUE;
1089 
1090         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1091         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1092         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1093         a->maxnz += bs2*CHUNKSIZE;
1094         a->reallocs++;
1095         a->nz++;
1096       }
1097       N = nrow++ - 1;
1098       /* shift up all the later entries in this row */
1099       for (ii=N; ii>=i; ii--) {
1100         rp[ii+1] = rp[ii];
1101         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1102       }
1103       if (N>=i) {
1104         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1105       }
1106       rp[i]                      = bcol;
1107       ap[bs2*i + bs*cidx + ridx] = value;
1108       noinsert1:;
1109       low = i;
1110     }
1111     ailen[brow] = nrow;
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1119 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1120 {
1121   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1122   Mat         outA;
1123   int         ierr;
1124   PetscTruth  row_identity,col_identity;
1125 
1126   PetscFunctionBegin;
1127   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1128   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1129   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1130   if (!row_identity || !col_identity) {
1131     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1132   }
1133 
1134   outA          = inA;
1135   inA->factor   = FACTOR_LU;
1136 
1137   if (!a->diag) {
1138     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1139   }
1140   /*
1141       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1142       for ILU(0) factorization with natural ordering
1143   */
1144   if (a->bs < 8) {
1145     ierr = MatSeqBAIJ_UpdateFactor_NaturalOrdering(inA);CHKERRQ(ierr);
1146   } else {
1147     a->row        = row;
1148     a->col        = col;
1149     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1150     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1151 
1152     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1153     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1154     PetscLogObjectParent(inA,a->icol);
1155 
1156     if (!a->solve_work) {
1157       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1158       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1159     }
1160   }
1161 
1162   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1163 
1164   PetscFunctionReturn(0);
1165 }
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1168 int MatPrintHelp_SeqBAIJ(Mat A)
1169 {
1170   static PetscTruth called = PETSC_FALSE;
1171   MPI_Comm          comm = A->comm;
1172   int               ierr;
1173 
1174   PetscFunctionBegin;
1175   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1176   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1177   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 EXTERN_C_BEGIN
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1184 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1185 {
1186   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1187   int         i,nz,n;
1188 
1189   PetscFunctionBegin;
1190   nz = baij->maxnz;
1191   n  = mat->n;
1192   for (i=0; i<nz; i++) {
1193     baij->j[i] = indices[i];
1194   }
1195   baij->nz = nz;
1196   for (i=0; i<n; i++) {
1197     baij->ilen[i] = baij->imax[i];
1198   }
1199 
1200   PetscFunctionReturn(0);
1201 }
1202 EXTERN_C_END
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1206 /*@
1207     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1208        in the matrix.
1209 
1210   Input Parameters:
1211 +  mat - the SeqBAIJ matrix
1212 -  indices - the column indices
1213 
1214   Level: advanced
1215 
1216   Notes:
1217     This can be called if you have precomputed the nonzero structure of the
1218   matrix and want to provide it to the matrix object to improve the performance
1219   of the MatSetValues() operation.
1220 
1221     You MUST have set the correct numbers of nonzeros per row in the call to
1222   MatCreateSeqBAIJ().
1223 
1224     MUST be called before any calls to MatSetValues();
1225 
1226 @*/
1227 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1228 {
1229   int ierr,(*f)(Mat,int *);
1230 
1231   PetscFunctionBegin;
1232   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1233   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1234   if (f) {
1235     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1236   } else {
1237     SETERRQ(1,"Wrong type of matrix to set column indices");
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1244 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1245 {
1246   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1247   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1248   PetscReal    atmp;
1249   Scalar       *x,zero = 0.0;
1250   MatScalar    *aa;
1251   int          ncols,brow,krow,kcol;
1252 
1253   PetscFunctionBegin;
1254   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1255   bs   = a->bs;
1256   aa   = a->a;
1257   ai   = a->i;
1258   aj   = a->j;
1259   mbs = a->mbs;
1260 
1261   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1262   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1263   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1264   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1265   for (i=0; i<mbs; i++) {
1266     ncols = ai[1] - ai[0]; ai++;
1267     brow  = bs*i;
1268     for (j=0; j<ncols; j++){
1269       /* bcol = bs*(*aj); */
1270       for (kcol=0; kcol<bs; kcol++){
1271         for (krow=0; krow<bs; krow++){
1272           atmp = PetscAbsScalar(*aa); aa++;
1273           row = brow + krow;    /* row index */
1274           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1275           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1276         }
1277       }
1278       aj++;
1279     }
1280   }
1281   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1287 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1288 {
1289   int        ierr;
1290 
1291   PetscFunctionBegin;
1292   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1298 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1299 {
1300   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1301   PetscFunctionBegin;
1302   *array = a->a;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1308 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1309 {
1310   PetscFunctionBegin;
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /* -------------------------------------------------------------------*/
1315 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1316        MatGetRow_SeqBAIJ,
1317        MatRestoreRow_SeqBAIJ,
1318        MatMult_SeqBAIJ_N,
1319        MatMultAdd_SeqBAIJ_N,
1320        MatMultTranspose_SeqBAIJ,
1321        MatMultTransposeAdd_SeqBAIJ,
1322        MatSolve_SeqBAIJ_N,
1323        0,
1324        0,
1325        0,
1326        MatLUFactor_SeqBAIJ,
1327        0,
1328        0,
1329        MatTranspose_SeqBAIJ,
1330        MatGetInfo_SeqBAIJ,
1331        MatEqual_SeqBAIJ,
1332        MatGetDiagonal_SeqBAIJ,
1333        MatDiagonalScale_SeqBAIJ,
1334        MatNorm_SeqBAIJ,
1335        0,
1336        MatAssemblyEnd_SeqBAIJ,
1337        0,
1338        MatSetOption_SeqBAIJ,
1339        MatZeroEntries_SeqBAIJ,
1340        MatZeroRows_SeqBAIJ,
1341        MatLUFactorSymbolic_SeqBAIJ,
1342        MatLUFactorNumeric_SeqBAIJ_N,
1343        0,
1344        0,
1345        MatSetUpPreallocation_SeqBAIJ,
1346        0,
1347        MatGetOwnershipRange_SeqBAIJ,
1348        MatILUFactorSymbolic_SeqBAIJ,
1349        0,
1350        MatGetArray_SeqBAIJ,
1351        MatRestoreArray_SeqBAIJ,
1352        MatDuplicate_SeqBAIJ,
1353        0,
1354        0,
1355        MatILUFactor_SeqBAIJ,
1356        0,
1357        0,
1358        MatGetSubMatrices_SeqBAIJ,
1359        MatIncreaseOverlap_SeqBAIJ,
1360        MatGetValues_SeqBAIJ,
1361        0,
1362        MatPrintHelp_SeqBAIJ,
1363        MatScale_SeqBAIJ,
1364        0,
1365        0,
1366        0,
1367        MatGetBlockSize_SeqBAIJ,
1368        MatGetRowIJ_SeqBAIJ,
1369        MatRestoreRowIJ_SeqBAIJ,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        MatSetValuesBlocked_SeqBAIJ,
1377        MatGetSubMatrix_SeqBAIJ,
1378        MatDestroy_SeqBAIJ,
1379        MatView_SeqBAIJ,
1380        MatGetMaps_Petsc,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        MatGetRowMax_SeqBAIJ,
1388        MatConvert_Basic};
1389 
1390 EXTERN_C_BEGIN
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1393 int MatStoreValues_SeqBAIJ(Mat mat)
1394 {
1395   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1396   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1397   int         ierr;
1398 
1399   PetscFunctionBegin;
1400   if (aij->nonew != 1) {
1401     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1402   }
1403 
1404   /* allocate space for values if not already there */
1405   if (!aij->saved_values) {
1406     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1407   }
1408 
1409   /* copy values over */
1410   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 EXTERN_C_END
1414 
1415 EXTERN_C_BEGIN
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1418 int MatRetrieveValues_SeqBAIJ(Mat mat)
1419 {
1420   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1421   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1422 
1423   PetscFunctionBegin;
1424   if (aij->nonew != 1) {
1425     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1426   }
1427   if (!aij->saved_values) {
1428     SETERRQ(1,"Must call MatStoreValues(A);first");
1429   }
1430 
1431   /* copy values over */
1432   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 EXTERN_C_END
1436 
1437 EXTERN_C_BEGIN
1438 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1439 EXTERN_C_END
1440 
1441 EXTERN_C_BEGIN
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatCreate_SeqBAIJ"
1444 int MatCreate_SeqBAIJ(Mat B)
1445 {
1446   int         ierr,size;
1447   Mat_SeqBAIJ *b;
1448 
1449   PetscFunctionBegin;
1450   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1451   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1452 
1453   B->m = B->M = PetscMax(B->m,B->M);
1454   B->n = B->N = PetscMax(B->n,B->N);
1455   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1456   B->data = (void*)b;
1457   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1458   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1459   B->factor           = 0;
1460   B->lupivotthreshold = 1.0;
1461   B->mapping          = 0;
1462   b->row              = 0;
1463   b->col              = 0;
1464   b->icol             = 0;
1465   b->reallocs         = 0;
1466   b->saved_values     = 0;
1467 #if defined(PETSC_USE_MAT_SINGLE)
1468   b->setvalueslen     = 0;
1469   b->setvaluescopy    = PETSC_NULL;
1470 #endif
1471   b->single_precision_solves = PETSC_FALSE;
1472 
1473   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1474   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1475 
1476   b->sorted           = PETSC_FALSE;
1477   b->roworiented      = PETSC_TRUE;
1478   b->nonew            = 0;
1479   b->diag             = 0;
1480   b->solve_work       = 0;
1481   b->mult_work        = 0;
1482   b->spptr            = 0;
1483   B->info.nz_unneeded = (PetscReal)b->maxnz;
1484   b->keepzeroedrows   = PETSC_FALSE;
1485 
1486   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1487                                      "MatStoreValues_SeqBAIJ",
1488                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1490                                      "MatRetrieveValues_SeqBAIJ",
1491                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1493                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1494                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1496                                      "MatConvert_SeqBAIJ_SeqAIJ",
1497                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 EXTERN_C_END
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1504 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1505 {
1506   Mat         C;
1507   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1508   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1509 
1510   PetscFunctionBegin;
1511   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1512 
1513   *B = 0;
1514   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1515   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1516   c    = (Mat_SeqBAIJ*)C->data;
1517 
1518   c->bs         = a->bs;
1519   c->bs2        = a->bs2;
1520   c->mbs        = a->mbs;
1521   c->nbs        = a->nbs;
1522   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1523 
1524   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1525   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1526   for (i=0; i<mbs; i++) {
1527     c->imax[i] = a->imax[i];
1528     c->ilen[i] = a->ilen[i];
1529   }
1530 
1531   /* allocate the matrix space */
1532   c->singlemalloc = PETSC_TRUE;
1533   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1534   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1535   c->j = (int*)(c->a + nz*bs2);
1536   c->i = c->j + nz;
1537   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1538   if (mbs > 0) {
1539     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1540     if (cpvalues == MAT_COPY_VALUES) {
1541       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1542     } else {
1543       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1544     }
1545   }
1546 
1547   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1548   c->sorted      = a->sorted;
1549   c->roworiented = a->roworiented;
1550   c->nonew       = a->nonew;
1551 
1552   if (a->diag) {
1553     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1554     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1555     for (i=0; i<mbs; i++) {
1556       c->diag[i] = a->diag[i];
1557     }
1558   } else c->diag        = 0;
1559   c->nz                 = a->nz;
1560   c->maxnz              = a->maxnz;
1561   c->solve_work         = 0;
1562   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1563   c->mult_work          = 0;
1564   C->preallocated       = PETSC_TRUE;
1565   C->assembled          = PETSC_TRUE;
1566   *B = C;
1567   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 EXTERN_C_BEGIN
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatLoad_SeqBAIJ"
1574 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1575 {
1576   Mat_SeqBAIJ  *a;
1577   Mat          B;
1578   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1579   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1580   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1581   int          *masked,nmask,tmp,bs2,ishift;
1582   Scalar       *aa;
1583   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1584 
1585   PetscFunctionBegin;
1586   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1587   bs2  = bs*bs;
1588 
1589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1590   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1591   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1592   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1593   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1594   M = header[1]; N = header[2]; nz = header[3];
1595 
1596   if (header[3] < 0) {
1597     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1598   }
1599 
1600   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1601 
1602   /*
1603      This code adds extra rows to make sure the number of rows is
1604     divisible by the blocksize
1605   */
1606   mbs        = M/bs;
1607   extra_rows = bs - M + bs*(mbs);
1608   if (extra_rows == bs) extra_rows = 0;
1609   else                  mbs++;
1610   if (extra_rows) {
1611     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1612   }
1613 
1614   /* read in row lengths */
1615   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1616   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1617   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1618 
1619   /* read in column indices */
1620   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1621   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1622   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1623 
1624   /* loop over row lengths determining block row lengths */
1625   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1626   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1627   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1628   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1629   masked   = mask + mbs;
1630   rowcount = 0; nzcount = 0;
1631   for (i=0; i<mbs; i++) {
1632     nmask = 0;
1633     for (j=0; j<bs; j++) {
1634       kmax = rowlengths[rowcount];
1635       for (k=0; k<kmax; k++) {
1636         tmp = jj[nzcount++]/bs;
1637         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1638       }
1639       rowcount++;
1640     }
1641     browlengths[i] += nmask;
1642     /* zero out the mask elements we set */
1643     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1644   }
1645 
1646   /* create our matrix */
1647   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1648   B = *A;
1649   a = (Mat_SeqBAIJ*)B->data;
1650 
1651   /* set matrix "i" values */
1652   a->i[0] = 0;
1653   for (i=1; i<= mbs; i++) {
1654     a->i[i]      = a->i[i-1] + browlengths[i-1];
1655     a->ilen[i-1] = browlengths[i-1];
1656   }
1657   a->nz         = 0;
1658   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1659 
1660   /* read in nonzero values */
1661   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1662   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1663   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1664 
1665   /* set "a" and "j" values into matrix */
1666   nzcount = 0; jcount = 0;
1667   for (i=0; i<mbs; i++) {
1668     nzcountb = nzcount;
1669     nmask    = 0;
1670     for (j=0; j<bs; j++) {
1671       kmax = rowlengths[i*bs+j];
1672       for (k=0; k<kmax; k++) {
1673         tmp = jj[nzcount++]/bs;
1674 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1675       }
1676     }
1677     /* sort the masked values */
1678     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1679 
1680     /* set "j" values into matrix */
1681     maskcount = 1;
1682     for (j=0; j<nmask; j++) {
1683       a->j[jcount++]  = masked[j];
1684       mask[masked[j]] = maskcount++;
1685     }
1686     /* set "a" values into matrix */
1687     ishift = bs2*a->i[i];
1688     for (j=0; j<bs; j++) {
1689       kmax = rowlengths[i*bs+j];
1690       for (k=0; k<kmax; k++) {
1691         tmp       = jj[nzcountb]/bs ;
1692         block     = mask[tmp] - 1;
1693         point     = jj[nzcountb] - bs*tmp;
1694         idx       = ishift + bs2*block + j + bs*point;
1695         a->a[idx] = (MatScalar)aa[nzcountb++];
1696       }
1697     }
1698     /* zero out the mask elements we set */
1699     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1700   }
1701   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1702 
1703   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1704   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1705   ierr = PetscFree(aa);CHKERRQ(ierr);
1706   ierr = PetscFree(jj);CHKERRQ(ierr);
1707   ierr = PetscFree(mask);CHKERRQ(ierr);
1708 
1709   B->assembled = PETSC_TRUE;
1710 
1711   ierr = MatView_Private(B);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 EXTERN_C_END
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatCreateSeqBAIJ"
1718 /*@C
1719    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1720    compressed row) format.  For good matrix assembly performance the
1721    user should preallocate the matrix storage by setting the parameter nz
1722    (or the array nnz).  By setting these parameters accurately, performance
1723    during matrix assembly can be increased by more than a factor of 50.
1724 
1725    Collective on MPI_Comm
1726 
1727    Input Parameters:
1728 +  comm - MPI communicator, set to PETSC_COMM_SELF
1729 .  bs - size of block
1730 .  m - number of rows
1731 .  n - number of columns
1732 .  nz - number of nonzero blocks  per block row (same for all rows)
1733 -  nnz - array containing the number of nonzero blocks in the various block rows
1734          (possibly different for each block row) or PETSC_NULL
1735 
1736    Output Parameter:
1737 .  A - the matrix
1738 
1739    Options Database Keys:
1740 .   -mat_no_unroll - uses code that does not unroll the loops in the
1741                      block calculations (much slower)
1742 .    -mat_block_size - size of the blocks to use
1743 
1744    Level: intermediate
1745 
1746    Notes:
1747    A nonzero block is any block that as 1 or more nonzeros in it
1748 
1749    The block AIJ format is fully compatible with standard Fortran 77
1750    storage.  That is, the stored row and column indices can begin at
1751    either one (as in Fortran) or zero.  See the users' manual for details.
1752 
1753    Specify the preallocated storage with either nz or nnz (not both).
1754    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1755    allocation.  For additional details, see the users manual chapter on
1756    matrices.
1757 
1758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1759 @*/
1760 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1761 {
1762   int ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1766   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1767   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1773 /*@C
1774    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1775    per row in the matrix. For good matrix assembly performance the
1776    user should preallocate the matrix storage by setting the parameter nz
1777    (or the array nnz).  By setting these parameters accurately, performance
1778    during matrix assembly can be increased by more than a factor of 50.
1779 
1780    Collective on MPI_Comm
1781 
1782    Input Parameters:
1783 +  A - the matrix
1784 .  bs - size of block
1785 .  nz - number of block nonzeros per block row (same for all rows)
1786 -  nnz - array containing the number of block nonzeros in the various block rows
1787          (possibly different for each block row) or PETSC_NULL
1788 
1789    Options Database Keys:
1790 .   -mat_no_unroll - uses code that does not unroll the loops in the
1791                      block calculations (much slower)
1792 .    -mat_block_size - size of the blocks to use
1793 
1794    Level: intermediate
1795 
1796    Notes:
1797    The block AIJ format is fully compatible with standard Fortran 77
1798    storage.  That is, the stored row and column indices can begin at
1799    either one (as in Fortran) or zero.  See the users' manual for details.
1800 
1801    Specify the preallocated storage with either nz or nnz (not both).
1802    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1803    allocation.  For additional details, see the users manual chapter on
1804    matrices.
1805 
1806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1807 @*/
1808 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1809 {
1810   Mat_SeqBAIJ *b;
1811   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1812   PetscTruth  flg;
1813 
1814   PetscFunctionBegin;
1815   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1816   if (!flg) PetscFunctionReturn(0);
1817 
1818   B->preallocated = PETSC_TRUE;
1819   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1820   if (nnz && newbs != bs) {
1821     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1822   }
1823   bs   = newbs;
1824 
1825   mbs  = B->m/bs;
1826   nbs  = B->n/bs;
1827   bs2  = bs*bs;
1828 
1829   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1830     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1831   }
1832 
1833   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1834   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1835   if (nnz) {
1836     for (i=0; i<mbs; i++) {
1837       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1838       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1839     }
1840   }
1841 
1842   b       = (Mat_SeqBAIJ*)B->data;
1843   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1844   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1845   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1846   if (!flg) {
1847     switch (bs) {
1848     case 1:
1849       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1850       B->ops->mult            = MatMult_SeqBAIJ_1;
1851       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1852       break;
1853     case 2:
1854       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1855       B->ops->mult            = MatMult_SeqBAIJ_2;
1856       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1857       break;
1858     case 3:
1859       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1860       B->ops->mult            = MatMult_SeqBAIJ_3;
1861       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1862       break;
1863     case 4:
1864       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1865       B->ops->mult            = MatMult_SeqBAIJ_4;
1866       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1867       break;
1868     case 5:
1869       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1870       B->ops->mult            = MatMult_SeqBAIJ_5;
1871       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1872       break;
1873     case 6:
1874       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1875       B->ops->mult            = MatMult_SeqBAIJ_6;
1876       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1877       break;
1878     case 7:
1879       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1880       B->ops->mult            = MatMult_SeqBAIJ_7;
1881       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1882       break;
1883     default:
1884       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1885       B->ops->mult            = MatMult_SeqBAIJ_N;
1886       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1887       break;
1888     }
1889   }
1890   b->bs      = bs;
1891   b->mbs     = mbs;
1892   b->nbs     = nbs;
1893   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1894   if (!nnz) {
1895     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1896     else if (nz <= 0)        nz = 1;
1897     for (i=0; i<mbs; i++) b->imax[i] = nz;
1898     nz = nz*mbs;
1899   } else {
1900     nz = 0;
1901     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1902   }
1903 
1904   /* allocate the matrix space */
1905   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1906   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1907   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1908   b->j  = (int*)(b->a + nz*bs2);
1909   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1910   b->i  = b->j + nz;
1911   b->singlemalloc = PETSC_TRUE;
1912 
1913   b->i[0] = 0;
1914   for (i=1; i<mbs+1; i++) {
1915     b->i[i] = b->i[i-1] + b->imax[i-1];
1916   }
1917 
1918   /* b->ilen will count nonzeros in each block row so far. */
1919   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1920   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1921   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1922 
1923   b->bs               = bs;
1924   b->bs2              = bs2;
1925   b->mbs              = mbs;
1926   b->nz               = 0;
1927   b->maxnz            = nz*bs2;
1928   B->info.nz_unneeded = (PetscReal)b->maxnz;
1929   PetscFunctionReturn(0);
1930 }
1931 
1932