xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 01d980435d1a6c3134b107c9687e3ee899478a5c)
1 /*$Id: baij.c,v 1.239 2001/07/16 18:15:53 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled=PETSC_FALSE;
217       int        ierr;
218       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
219 #if defined(PETSC_HAVE_SSE)
220       if (sse_enabled) {
221           a->single_precision_solves = PETSC_TRUE;
222           A->ops->solve              = MatSolve_SeqBAIJ_Update;
223           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
224           break;
225       }
226 #else
227       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
228 #endif
229     }
230     break;
231   default:
232     SETERRQ(PETSC_ERR_SUP,"unknown option");
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
239 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
240 {
241   PetscFunctionBegin;
242   if (m) *m = 0;
243   if (n) *n = A->m;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetRow_SeqBAIJ"
249 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
250 {
251   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
252   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
253   MatScalar    *aa,*aa_i;
254   Scalar       *v_i;
255 
256   PetscFunctionBegin;
257   bs  = a->bs;
258   ai  = a->i;
259   aj  = a->j;
260   aa  = a->a;
261   bs2 = a->bs2;
262 
263   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
264 
265   bn  = row/bs;   /* Block number */
266   bp  = row % bs; /* Block Position */
267   M   = ai[bn+1] - ai[bn];
268   *nz = bs*M;
269 
270   if (v) {
271     *v = 0;
272     if (*nz) {
273       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
274       for (i=0; i<M; i++) { /* for each block in the block row */
275         v_i  = *v + i*bs;
276         aa_i = aa + bs2*(ai[bn] + i);
277         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
278       }
279     }
280   }
281 
282   if (idx) {
283     *idx = 0;
284     if (*nz) {
285       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
286       for (i=0; i<M; i++) { /* for each block in the block row */
287         idx_i = *idx + i*bs;
288         itmp  = bs*aj[ai[bn] + i];
289         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
290       }
291     }
292   }
293   PetscFunctionReturn(0);
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
298 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
299 {
300   int ierr;
301 
302   PetscFunctionBegin;
303   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
304   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatTranspose_SeqBAIJ"
310 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
311 {
312   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
313   Mat         C;
314   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
315   int         *rows,*cols,bs2=a->bs2;
316   Scalar      *array;
317 
318   PetscFunctionBegin;
319   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
320   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
321   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
322 
323 #if defined(PETSC_USE_MAT_SINGLE)
324   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
325   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
326 #else
327   array = a->a;
328 #endif
329 
330   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
331   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
332   ierr = PetscFree(col);CHKERRQ(ierr);
333   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
334   cols = rows + bs;
335   for (i=0; i<mbs; i++) {
336     cols[0] = i*bs;
337     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
338     len = ai[i+1] - ai[i];
339     for (j=0; j<len; j++) {
340       rows[0] = (*aj++)*bs;
341       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
342       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
343       array += bs2;
344     }
345   }
346   ierr = PetscFree(rows);CHKERRQ(ierr);
347 #if defined(PETSC_USE_MAT_SINGLE)
348   ierr = PetscFree(array);
349 #endif
350 
351   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353 
354   if (B) {
355     *B = C;
356   } else {
357     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
364 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
365 {
366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
367   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
368   Scalar      *aa;
369   FILE        *file;
370 
371   PetscFunctionBegin;
372   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
373   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
374   col_lens[0] = MAT_COOKIE;
375 
376   col_lens[1] = A->m;
377   col_lens[2] = A->n;
378   col_lens[3] = a->nz*bs2;
379 
380   /* store lengths of each row and write (including header) to file */
381   count = 0;
382   for (i=0; i<a->mbs; i++) {
383     for (j=0; j<bs; j++) {
384       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
385     }
386   }
387   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
388   ierr = PetscFree(col_lens);CHKERRQ(ierr);
389 
390   /* store column indices (zero start index) */
391   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
392   count = 0;
393   for (i=0; i<a->mbs; i++) {
394     for (j=0; j<bs; j++) {
395       for (k=a->i[i]; k<a->i[i+1]; k++) {
396         for (l=0; l<bs; l++) {
397           jj[count++] = bs*a->j[k] + l;
398         }
399       }
400     }
401   }
402   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
403   ierr = PetscFree(jj);CHKERRQ(ierr);
404 
405   /* store nonzero values */
406   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
407   count = 0;
408   for (i=0; i<a->mbs; i++) {
409     for (j=0; j<bs; j++) {
410       for (k=a->i[i]; k<a->i[i+1]; k++) {
411         for (l=0; l<bs; l++) {
412           aa[count++] = a->a[bs2*k + l*bs + j];
413         }
414       }
415     }
416   }
417   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
418   ierr = PetscFree(aa);CHKERRQ(ierr);
419 
420   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
421   if (file) {
422     fprintf(file,"-matload_block_size %d\n",a->bs);
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
429 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
430 {
431   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
432   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
433   PetscViewerFormat format;
434 
435   PetscFunctionBegin;
436   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
437   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
438     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
439   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
440     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
441   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
442     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
443     for (i=0; i<a->mbs; i++) {
444       for (j=0; j<bs; j++) {
445         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
446         for (k=a->i[i]; k<a->i[i+1]; k++) {
447           for (l=0; l<bs; l++) {
448 #if defined(PETSC_USE_COMPLEX)
449             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
450               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
451                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
453               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
454                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
455             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
456               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
457             }
458 #else
459             if (a->a[bs2*k + l*bs + j] != 0.0) {
460               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
461             }
462 #endif
463           }
464         }
465         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
466       }
467     }
468     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
469   } else {
470     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
471     for (i=0; i<a->mbs; i++) {
472       for (j=0; j<bs; j++) {
473         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
474         for (k=a->i[i]; k<a->i[i+1]; k++) {
475           for (l=0; l<bs; l++) {
476 #if defined(PETSC_USE_COMPLEX)
477             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
478               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
479                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
481               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
482                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
483             } else {
484               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
485             }
486 #else
487             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
488 #endif
489           }
490         }
491         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
492       }
493     }
494     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
495   }
496   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
502 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
503 {
504   Mat          A = (Mat) Aa;
505   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
506   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
507   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
508   MatScalar    *aa;
509   PetscViewer  viewer;
510 
511   PetscFunctionBegin;
512 
513   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
514   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
515 
516   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
517 
518   /* loop over matrix elements drawing boxes */
519   color = PETSC_DRAW_BLUE;
520   for (i=0,row=0; i<mbs; i++,row+=bs) {
521     for (j=a->i[i]; j<a->i[i+1]; j++) {
522       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
523       x_l = a->j[j]*bs; x_r = x_l + 1.0;
524       aa = a->a + j*bs2;
525       for (k=0; k<bs; k++) {
526         for (l=0; l<bs; l++) {
527           if (PetscRealPart(*aa++) >=  0.) continue;
528           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
529         }
530       }
531     }
532   }
533   color = PETSC_DRAW_CYAN;
534   for (i=0,row=0; i<mbs; i++,row+=bs) {
535     for (j=a->i[i]; j<a->i[i+1]; j++) {
536       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
537       x_l = a->j[j]*bs; x_r = x_l + 1.0;
538       aa = a->a + j*bs2;
539       for (k=0; k<bs; k++) {
540         for (l=0; l<bs; l++) {
541           if (PetscRealPart(*aa++) != 0.) continue;
542           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
543         }
544       }
545     }
546   }
547 
548   color = PETSC_DRAW_RED;
549   for (i=0,row=0; i<mbs; i++,row+=bs) {
550     for (j=a->i[i]; j<a->i[i+1]; j++) {
551       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
552       x_l = a->j[j]*bs; x_r = x_l + 1.0;
553       aa = a->a + j*bs2;
554       for (k=0; k<bs; k++) {
555         for (l=0; l<bs; l++) {
556           if (PetscRealPart(*aa++) <= 0.) continue;
557           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
567 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
568 {
569   int          ierr;
570   PetscReal    xl,yl,xr,yr,w,h;
571   PetscDraw    draw;
572   PetscTruth   isnull;
573 
574   PetscFunctionBegin;
575 
576   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
577   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
578 
579   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
580   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
581   xr += w;    yr += h;  xl = -w;     yl = -h;
582   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
583   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
584   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_SeqBAIJ"
590 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth issocket,isascii,isbinary,isdraw;
594 
595   PetscFunctionBegin;
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
600   if (issocket) {
601     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
602   } else if (isascii){
603     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
606   } else if (isdraw) {
607     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
608   } else {
609     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatGetValues_SeqBAIJ"
617 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
618 {
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
621   int        *ai = a->i,*ailen = a->ilen;
622   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
623   MatScalar  *ap,*aa = a->a,zero = 0.0;
624 
625   PetscFunctionBegin;
626   for (k=0; k<m; k++) { /* loop over rows */
627     row  = im[k]; brow = row/bs;
628     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
629     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
630     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
631     nrow = ailen[brow];
632     for (l=0; l<n; l++) { /* loop over columns */
633       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
634       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
635       col  = in[l] ;
636       bcol = col/bs;
637       cidx = col%bs;
638       ridx = row%bs;
639       high = nrow;
640       low  = 0; /* assume unsorted */
641       while (high-low > 5) {
642         t = (low+high)/2;
643         if (rp[t] > bcol) high = t;
644         else             low  = t;
645       }
646       for (i=low; i<high; i++) {
647         if (rp[i] > bcol) break;
648         if (rp[i] == bcol) {
649           *v++ = ap[bs2*i+bs*cidx+ridx];
650           goto finished;
651         }
652       }
653       *v++ = zero;
654       finished:;
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #if defined(PETSC_USE_MAT_SINGLE)
661 #undef __FUNCT__
662 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
663 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
664 {
665   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
666   int         ierr,i,N = m*n*b->bs2;
667   MatScalar   *vsingle;
668 
669   PetscFunctionBegin;
670   if (N > b->setvalueslen) {
671     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
672     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
673     b->setvalueslen  = N;
674   }
675   vsingle = b->setvaluescopy;
676   for (i=0; i<N; i++) {
677     vsingle[i] = v[i];
678   }
679   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 #endif
683 
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
687 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
688 {
689   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
691   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
692   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
693   PetscTruth  roworiented=a->roworiented;
694   MatScalar   *value = v,*ap,*aa = a->a,*bap;
695 
696   PetscFunctionBegin;
697   if (roworiented) {
698     stepval = (n-1)*bs;
699   } else {
700     stepval = (m-1)*bs;
701   }
702   for (k=0; k<m; k++) { /* loop over added rows */
703     row  = im[k];
704     if (row < 0) continue;
705 #if defined(PETSC_USE_BOPT_g)
706     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
707 #endif
708     rp   = aj + ai[row];
709     ap   = aa + bs2*ai[row];
710     rmax = imax[row];
711     nrow = ailen[row];
712     low  = 0;
713     for (l=0; l<n; l++) { /* loop over added columns */
714       if (in[l] < 0) continue;
715 #if defined(PETSC_USE_BOPT_g)
716       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
717 #endif
718       col = in[l];
719       if (roworiented) {
720         value = v + k*(stepval+bs)*bs + l*bs;
721       } else {
722         value = v + l*(stepval+bs)*bs + k*bs;
723       }
724       if (!sorted) low = 0; high = nrow;
725       while (high-low > 7) {
726         t = (low+high)/2;
727         if (rp[t] > col) high = t;
728         else             low  = t;
729       }
730       for (i=low; i<high; i++) {
731         if (rp[i] > col) break;
732         if (rp[i] == col) {
733           bap  = ap +  bs2*i;
734           if (roworiented) {
735             if (is == ADD_VALUES) {
736               for (ii=0; ii<bs; ii++,value+=stepval) {
737                 for (jj=ii; jj<bs2; jj+=bs) {
738                   bap[jj] += *value++;
739                 }
740               }
741             } else {
742               for (ii=0; ii<bs; ii++,value+=stepval) {
743                 for (jj=ii; jj<bs2; jj+=bs) {
744                   bap[jj] = *value++;
745                 }
746               }
747             }
748           } else {
749             if (is == ADD_VALUES) {
750               for (ii=0; ii<bs; ii++,value+=stepval) {
751                 for (jj=0; jj<bs; jj++) {
752                   *bap++ += *value++;
753                 }
754               }
755             } else {
756               for (ii=0; ii<bs; ii++,value+=stepval) {
757                 for (jj=0; jj<bs; jj++) {
758                   *bap++  = *value++;
759                 }
760               }
761             }
762           }
763           goto noinsert2;
764         }
765       }
766       if (nonew == 1) goto noinsert2;
767       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
768       if (nrow >= rmax) {
769         /* there is no extra room in row, therefore enlarge */
770         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
771         MatScalar *new_a;
772 
773         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
774 
775         /* malloc new storage space */
776         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
777 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
778         new_j   = (int*)(new_a + bs2*new_nz);
779         new_i   = new_j + new_nz;
780 
781         /* copy over old data into new slots */
782         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
783         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
784         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
785         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
786         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
787         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
788         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
790         /* free up old matrix storage */
791         ierr = PetscFree(a->a);CHKERRQ(ierr);
792         if (!a->singlemalloc) {
793           ierr = PetscFree(a->i);CHKERRQ(ierr);
794           ierr = PetscFree(a->j);CHKERRQ(ierr);
795         }
796         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
797         a->singlemalloc = PETSC_TRUE;
798 
799         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
800         rmax = imax[row] = imax[row] + CHUNKSIZE;
801         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
802         a->maxnz += bs2*CHUNKSIZE;
803         a->reallocs++;
804         a->nz++;
805       }
806       N = nrow++ - 1;
807       /* shift up all the later entries in this row */
808       for (ii=N; ii>=i; ii--) {
809         rp[ii+1] = rp[ii];
810         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
811       }
812       if (N >= i) {
813         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
814       }
815       rp[i] = col;
816       bap   = ap +  bs2*i;
817       if (roworiented) {
818         for (ii=0; ii<bs; ii++,value+=stepval) {
819           for (jj=ii; jj<bs2; jj+=bs) {
820             bap[jj] = *value++;
821           }
822         }
823       } else {
824         for (ii=0; ii<bs; ii++,value+=stepval) {
825           for (jj=0; jj<bs; jj++) {
826             *bap++  = *value++;
827           }
828         }
829       }
830       noinsert2:;
831       low = i;
832     }
833     ailen[row] = nrow;
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
841 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
842 {
843   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
844   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
845   int        m = A->m,*ip,N,*ailen = a->ilen;
846   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
847   MatScalar  *aa = a->a,*ap;
848 
849   PetscFunctionBegin;
850   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
851 
852   if (m) rmax = ailen[0];
853   for (i=1; i<mbs; i++) {
854     /* move each row back by the amount of empty slots (fshift) before it*/
855     fshift += imax[i-1] - ailen[i-1];
856     rmax   = PetscMax(rmax,ailen[i]);
857     if (fshift) {
858       ip = aj + ai[i]; ap = aa + bs2*ai[i];
859       N = ailen[i];
860       for (j=0; j<N; j++) {
861         ip[j-fshift] = ip[j];
862         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
863       }
864     }
865     ai[i] = ai[i-1] + ailen[i-1];
866   }
867   if (mbs) {
868     fshift += imax[mbs-1] - ailen[mbs-1];
869     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
870   }
871   /* reset ilen and imax for each row */
872   for (i=0; i<mbs; i++) {
873     ailen[i] = imax[i] = ai[i+1] - ai[i];
874   }
875   a->nz = ai[mbs];
876 
877   /* diagonals may have moved, so kill the diagonal pointers */
878   if (fshift && a->diag) {
879     ierr = PetscFree(a->diag);CHKERRQ(ierr);
880     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
881     a->diag = 0;
882   }
883   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
884            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
885   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
886            a->reallocs);
887   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
888   a->reallocs          = 0;
889   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
890 
891   PetscFunctionReturn(0);
892 }
893 
894 
895 
896 /*
897    This function returns an array of flags which indicate the locations of contiguous
898    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
899    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
900    Assume: sizes should be long enough to hold all the values.
901 */
902 #undef __FUNCT__
903 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
904 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
905 {
906   int        i,j,k,row;
907   PetscTruth flg;
908 
909   PetscFunctionBegin;
910   for (i=0,j=0; i<n; j++) {
911     row = idx[i];
912     if (row%bs!=0) { /* Not the begining of a block */
913       sizes[j] = 1;
914       i++;
915     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
916       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
917       i++;
918     } else { /* Begining of the block, so check if the complete block exists */
919       flg = PETSC_TRUE;
920       for (k=1; k<bs; k++) {
921         if (row+k != idx[i+k]) { /* break in the block */
922           flg = PETSC_FALSE;
923           break;
924         }
925       }
926       if (flg == PETSC_TRUE) { /* No break in the bs */
927         sizes[j] = bs;
928         i+= bs;
929       } else {
930         sizes[j] = 1;
931         i++;
932       }
933     }
934   }
935   *bs_max = j;
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
941 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
942 {
943   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
944   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
945   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
946   Scalar      zero = 0.0;
947   MatScalar   *aa;
948 
949   PetscFunctionBegin;
950   /* Make a copy of the IS and  sort it */
951   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
952   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
953 
954   /* allocate memory for rows,sizes */
955   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
956   sizes = rows + is_n;
957 
958   /* copy IS values to rows, and sort them */
959   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
960   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
961   if (baij->keepzeroedrows) {
962     for (i=0; i<is_n; i++) { sizes[i] = 1; }
963     bs_max = is_n;
964   } else {
965     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
966   }
967   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
968 
969   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
970     row   = rows[j];
971     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
972     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
973     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
974     if (sizes[i] == bs && !baij->keepzeroedrows) {
975       if (diag) {
976         if (baij->ilen[row/bs] > 0) {
977           baij->ilen[row/bs]       = 1;
978           baij->j[baij->i[row/bs]] = row/bs;
979           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
980         }
981         /* Now insert all the diagonal values for this bs */
982         for (k=0; k<bs; k++) {
983           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
984         }
985       } else { /* (!diag) */
986         baij->ilen[row/bs] = 0;
987       } /* end (!diag) */
988     } else { /* (sizes[i] != bs) */
989 #if defined (PETSC_USE_DEBUG)
990       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
991 #endif
992       for (k=0; k<count; k++) {
993         aa[0] =  zero;
994         aa    += bs;
995       }
996       if (diag) {
997         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
998       }
999     }
1000   }
1001 
1002   ierr = PetscFree(rows);CHKERRQ(ierr);
1003   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1009 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1010 {
1011   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1012   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1013   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1014   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1015   int         ridx,cidx,bs2=a->bs2,ierr;
1016   PetscTruth  roworiented=a->roworiented;
1017   MatScalar   *ap,value,*aa=a->a,*bap;
1018 
1019   PetscFunctionBegin;
1020   for (k=0; k<m; k++) { /* loop over added rows */
1021     row  = im[k]; brow = row/bs;
1022     if (row < 0) continue;
1023 #if defined(PETSC_USE_BOPT_g)
1024     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1025 #endif
1026     rp   = aj + ai[brow];
1027     ap   = aa + bs2*ai[brow];
1028     rmax = imax[brow];
1029     nrow = ailen[brow];
1030     low  = 0;
1031     for (l=0; l<n; l++) { /* loop over added columns */
1032       if (in[l] < 0) continue;
1033 #if defined(PETSC_USE_BOPT_g)
1034       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1035 #endif
1036       col = in[l]; bcol = col/bs;
1037       ridx = row % bs; cidx = col % bs;
1038       if (roworiented) {
1039         value = v[l + k*n];
1040       } else {
1041         value = v[k + l*m];
1042       }
1043       if (!sorted) low = 0; high = nrow;
1044       while (high-low > 7) {
1045         t = (low+high)/2;
1046         if (rp[t] > bcol) high = t;
1047         else              low  = t;
1048       }
1049       for (i=low; i<high; i++) {
1050         if (rp[i] > bcol) break;
1051         if (rp[i] == bcol) {
1052           bap  = ap +  bs2*i + bs*cidx + ridx;
1053           if (is == ADD_VALUES) *bap += value;
1054           else                  *bap  = value;
1055           goto noinsert1;
1056         }
1057       }
1058       if (nonew == 1) goto noinsert1;
1059       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1060       if (nrow >= rmax) {
1061         /* there is no extra room in row, therefore enlarge */
1062         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1063         MatScalar *new_a;
1064 
1065         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1066 
1067         /* Malloc new storage space */
1068         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1069 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1070         new_j   = (int*)(new_a + bs2*new_nz);
1071         new_i   = new_j + new_nz;
1072 
1073         /* copy over old data into new slots */
1074         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1075         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1076         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1077         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1078         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1079         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1080         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1082         /* free up old matrix storage */
1083         ierr = PetscFree(a->a);CHKERRQ(ierr);
1084         if (!a->singlemalloc) {
1085           ierr = PetscFree(a->i);CHKERRQ(ierr);
1086           ierr = PetscFree(a->j);CHKERRQ(ierr);
1087         }
1088         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1089         a->singlemalloc = PETSC_TRUE;
1090 
1091         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1092         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1093         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1094         a->maxnz += bs2*CHUNKSIZE;
1095         a->reallocs++;
1096         a->nz++;
1097       }
1098       N = nrow++ - 1;
1099       /* shift up all the later entries in this row */
1100       for (ii=N; ii>=i; ii--) {
1101         rp[ii+1] = rp[ii];
1102         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1103       }
1104       if (N>=i) {
1105         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1106       }
1107       rp[i]                      = bcol;
1108       ap[bs2*i + bs*cidx + ridx] = value;
1109       noinsert1:;
1110       low = i;
1111     }
1112     ailen[brow] = nrow;
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1120 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1121 {
1122   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1123   Mat         outA;
1124   int         ierr;
1125   PetscTruth  row_identity,col_identity;
1126 
1127   PetscFunctionBegin;
1128   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1129   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1130   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1131   if (!row_identity || !col_identity) {
1132     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1133   }
1134 
1135   outA          = inA;
1136   inA->factor   = FACTOR_LU;
1137 
1138   if (!a->diag) {
1139     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1140   }
1141   /*
1142       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1143       for ILU(0) factorization with natural ordering
1144   */
1145   if (a->bs < 8) {
1146     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1147   } else {
1148     a->row        = row;
1149     a->col        = col;
1150     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1151     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1152 
1153     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1154     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1155     PetscLogObjectParent(inA,a->icol);
1156 
1157     if (!a->solve_work) {
1158       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1159       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1160     }
1161   }
1162 
1163   if (row_identity && col_identity) {
1164     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1165   }
1166   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1167 
1168   PetscFunctionReturn(0);
1169 }
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1172 int MatPrintHelp_SeqBAIJ(Mat A)
1173 {
1174   static PetscTruth called = PETSC_FALSE;
1175   MPI_Comm          comm = A->comm;
1176   int               ierr;
1177 
1178   PetscFunctionBegin;
1179   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1180   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1181   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 EXTERN_C_BEGIN
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1188 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1189 {
1190   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1191   int         i,nz,n;
1192 
1193   PetscFunctionBegin;
1194   nz = baij->maxnz;
1195   n  = mat->n;
1196   for (i=0; i<nz; i++) {
1197     baij->j[i] = indices[i];
1198   }
1199   baij->nz = nz;
1200   for (i=0; i<n; i++) {
1201     baij->ilen[i] = baij->imax[i];
1202   }
1203 
1204   PetscFunctionReturn(0);
1205 }
1206 EXTERN_C_END
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1210 /*@
1211     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1212        in the matrix.
1213 
1214   Input Parameters:
1215 +  mat - the SeqBAIJ matrix
1216 -  indices - the column indices
1217 
1218   Level: advanced
1219 
1220   Notes:
1221     This can be called if you have precomputed the nonzero structure of the
1222   matrix and want to provide it to the matrix object to improve the performance
1223   of the MatSetValues() operation.
1224 
1225     You MUST have set the correct numbers of nonzeros per row in the call to
1226   MatCreateSeqBAIJ().
1227 
1228     MUST be called before any calls to MatSetValues();
1229 
1230 @*/
1231 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1232 {
1233   int ierr,(*f)(Mat,int *);
1234 
1235   PetscFunctionBegin;
1236   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1237   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1238   if (f) {
1239     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1240   } else {
1241     SETERRQ(1,"Wrong type of matrix to set column indices");
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1248 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1249 {
1250   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1251   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1252   PetscReal    atmp;
1253   Scalar       *x,zero = 0.0;
1254   MatScalar    *aa;
1255   int          ncols,brow,krow,kcol;
1256 
1257   PetscFunctionBegin;
1258   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1259   bs   = a->bs;
1260   aa   = a->a;
1261   ai   = a->i;
1262   aj   = a->j;
1263   mbs = a->mbs;
1264 
1265   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1266   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1267   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1268   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1269   for (i=0; i<mbs; i++) {
1270     ncols = ai[1] - ai[0]; ai++;
1271     brow  = bs*i;
1272     for (j=0; j<ncols; j++){
1273       /* bcol = bs*(*aj); */
1274       for (kcol=0; kcol<bs; kcol++){
1275         for (krow=0; krow<bs; krow++){
1276           atmp = PetscAbsScalar(*aa); aa++;
1277           row = brow + krow;    /* row index */
1278           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1279           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1280         }
1281       }
1282       aj++;
1283     }
1284   }
1285   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1291 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1292 {
1293   int        ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1302 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1303 {
1304   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1305   PetscFunctionBegin;
1306   *array = a->a;
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1312 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1313 {
1314   PetscFunctionBegin;
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /* -------------------------------------------------------------------*/
1319 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1320        MatGetRow_SeqBAIJ,
1321        MatRestoreRow_SeqBAIJ,
1322        MatMult_SeqBAIJ_N,
1323        MatMultAdd_SeqBAIJ_N,
1324        MatMultTranspose_SeqBAIJ,
1325        MatMultTransposeAdd_SeqBAIJ,
1326        MatSolve_SeqBAIJ_N,
1327        0,
1328        0,
1329        0,
1330        MatLUFactor_SeqBAIJ,
1331        0,
1332        0,
1333        MatTranspose_SeqBAIJ,
1334        MatGetInfo_SeqBAIJ,
1335        MatEqual_SeqBAIJ,
1336        MatGetDiagonal_SeqBAIJ,
1337        MatDiagonalScale_SeqBAIJ,
1338        MatNorm_SeqBAIJ,
1339        0,
1340        MatAssemblyEnd_SeqBAIJ,
1341        0,
1342        MatSetOption_SeqBAIJ,
1343        MatZeroEntries_SeqBAIJ,
1344        MatZeroRows_SeqBAIJ,
1345        MatLUFactorSymbolic_SeqBAIJ,
1346        MatLUFactorNumeric_SeqBAIJ_N,
1347        0,
1348        0,
1349        MatSetUpPreallocation_SeqBAIJ,
1350        0,
1351        MatGetOwnershipRange_SeqBAIJ,
1352        MatILUFactorSymbolic_SeqBAIJ,
1353        0,
1354        MatGetArray_SeqBAIJ,
1355        MatRestoreArray_SeqBAIJ,
1356        MatDuplicate_SeqBAIJ,
1357        0,
1358        0,
1359        MatILUFactor_SeqBAIJ,
1360        0,
1361        0,
1362        MatGetSubMatrices_SeqBAIJ,
1363        MatIncreaseOverlap_SeqBAIJ,
1364        MatGetValues_SeqBAIJ,
1365        0,
1366        MatPrintHelp_SeqBAIJ,
1367        MatScale_SeqBAIJ,
1368        0,
1369        0,
1370        0,
1371        MatGetBlockSize_SeqBAIJ,
1372        MatGetRowIJ_SeqBAIJ,
1373        MatRestoreRowIJ_SeqBAIJ,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379        0,
1380        MatSetValuesBlocked_SeqBAIJ,
1381        MatGetSubMatrix_SeqBAIJ,
1382        MatDestroy_SeqBAIJ,
1383        MatView_SeqBAIJ,
1384        MatGetMaps_Petsc,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390        0,
1391        MatGetRowMax_SeqBAIJ,
1392        MatConvert_Basic};
1393 
1394 EXTERN_C_BEGIN
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1397 int MatStoreValues_SeqBAIJ(Mat mat)
1398 {
1399   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1400   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1401   int         ierr;
1402 
1403   PetscFunctionBegin;
1404   if (aij->nonew != 1) {
1405     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1406   }
1407 
1408   /* allocate space for values if not already there */
1409   if (!aij->saved_values) {
1410     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1411   }
1412 
1413   /* copy values over */
1414   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 EXTERN_C_END
1418 
1419 EXTERN_C_BEGIN
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1422 int MatRetrieveValues_SeqBAIJ(Mat mat)
1423 {
1424   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1425   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1426 
1427   PetscFunctionBegin;
1428   if (aij->nonew != 1) {
1429     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1430   }
1431   if (!aij->saved_values) {
1432     SETERRQ(1,"Must call MatStoreValues(A);first");
1433   }
1434 
1435   /* copy values over */
1436   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 EXTERN_C_END
1440 
1441 EXTERN_C_BEGIN
1442 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1443 EXTERN_C_END
1444 
1445 EXTERN_C_BEGIN
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatCreate_SeqBAIJ"
1448 int MatCreate_SeqBAIJ(Mat B)
1449 {
1450   int         ierr,size;
1451   Mat_SeqBAIJ *b;
1452 
1453   PetscFunctionBegin;
1454   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1455   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1456 
1457   B->m = B->M = PetscMax(B->m,B->M);
1458   B->n = B->N = PetscMax(B->n,B->N);
1459   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1460   B->data = (void*)b;
1461   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1462   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1463   B->factor           = 0;
1464   B->lupivotthreshold = 1.0;
1465   B->mapping          = 0;
1466   b->row              = 0;
1467   b->col              = 0;
1468   b->icol             = 0;
1469   b->reallocs         = 0;
1470   b->saved_values     = 0;
1471 #if defined(PETSC_USE_MAT_SINGLE)
1472   b->setvalueslen     = 0;
1473   b->setvaluescopy    = PETSC_NULL;
1474 #endif
1475   b->single_precision_solves = PETSC_FALSE;
1476 
1477   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1478   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1479 
1480   b->sorted           = PETSC_FALSE;
1481   b->roworiented      = PETSC_TRUE;
1482   b->nonew            = 0;
1483   b->diag             = 0;
1484   b->solve_work       = 0;
1485   b->mult_work        = 0;
1486   b->spptr            = 0;
1487   B->info.nz_unneeded = (PetscReal)b->maxnz;
1488   b->keepzeroedrows   = PETSC_FALSE;
1489 
1490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1491                                      "MatStoreValues_SeqBAIJ",
1492                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1494                                      "MatRetrieveValues_SeqBAIJ",
1495                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1497                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1498                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1500                                      "MatConvert_SeqBAIJ_SeqAIJ",
1501                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 EXTERN_C_END
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1508 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1509 {
1510   Mat         C;
1511   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1512   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1513 
1514   PetscFunctionBegin;
1515   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1516 
1517   *B = 0;
1518   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1519   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1520   c    = (Mat_SeqBAIJ*)C->data;
1521 
1522   c->bs         = a->bs;
1523   c->bs2        = a->bs2;
1524   c->mbs        = a->mbs;
1525   c->nbs        = a->nbs;
1526   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1527 
1528   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1529   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1530   for (i=0; i<mbs; i++) {
1531     c->imax[i] = a->imax[i];
1532     c->ilen[i] = a->ilen[i];
1533   }
1534 
1535   /* allocate the matrix space */
1536   c->singlemalloc = PETSC_TRUE;
1537   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1538   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1539   c->j = (int*)(c->a + nz*bs2);
1540   c->i = c->j + nz;
1541   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1542   if (mbs > 0) {
1543     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1544     if (cpvalues == MAT_COPY_VALUES) {
1545       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1546     } else {
1547       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1548     }
1549   }
1550 
1551   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1552   c->sorted      = a->sorted;
1553   c->roworiented = a->roworiented;
1554   c->nonew       = a->nonew;
1555 
1556   if (a->diag) {
1557     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1558     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1559     for (i=0; i<mbs; i++) {
1560       c->diag[i] = a->diag[i];
1561     }
1562   } else c->diag        = 0;
1563   c->nz                 = a->nz;
1564   c->maxnz              = a->maxnz;
1565   c->solve_work         = 0;
1566   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1567   c->mult_work          = 0;
1568   C->preallocated       = PETSC_TRUE;
1569   C->assembled          = PETSC_TRUE;
1570   *B = C;
1571   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 EXTERN_C_BEGIN
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatLoad_SeqBAIJ"
1578 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1579 {
1580   Mat_SeqBAIJ  *a;
1581   Mat          B;
1582   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1583   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1584   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1585   int          *masked,nmask,tmp,bs2,ishift;
1586   Scalar       *aa;
1587   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1588 
1589   PetscFunctionBegin;
1590   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1591   bs2  = bs*bs;
1592 
1593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1594   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1595   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1596   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1597   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1598   M = header[1]; N = header[2]; nz = header[3];
1599 
1600   if (header[3] < 0) {
1601     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1602   }
1603 
1604   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1605 
1606   /*
1607      This code adds extra rows to make sure the number of rows is
1608     divisible by the blocksize
1609   */
1610   mbs        = M/bs;
1611   extra_rows = bs - M + bs*(mbs);
1612   if (extra_rows == bs) extra_rows = 0;
1613   else                  mbs++;
1614   if (extra_rows) {
1615     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1616   }
1617 
1618   /* read in row lengths */
1619   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1620   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1621   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1622 
1623   /* read in column indices */
1624   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1625   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1626   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1627 
1628   /* loop over row lengths determining block row lengths */
1629   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1630   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1631   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1632   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1633   masked   = mask + mbs;
1634   rowcount = 0; nzcount = 0;
1635   for (i=0; i<mbs; i++) {
1636     nmask = 0;
1637     for (j=0; j<bs; j++) {
1638       kmax = rowlengths[rowcount];
1639       for (k=0; k<kmax; k++) {
1640         tmp = jj[nzcount++]/bs;
1641         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1642       }
1643       rowcount++;
1644     }
1645     browlengths[i] += nmask;
1646     /* zero out the mask elements we set */
1647     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1648   }
1649 
1650   /* create our matrix */
1651   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1652   B = *A;
1653   a = (Mat_SeqBAIJ*)B->data;
1654 
1655   /* set matrix "i" values */
1656   a->i[0] = 0;
1657   for (i=1; i<= mbs; i++) {
1658     a->i[i]      = a->i[i-1] + browlengths[i-1];
1659     a->ilen[i-1] = browlengths[i-1];
1660   }
1661   a->nz         = 0;
1662   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1663 
1664   /* read in nonzero values */
1665   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1666   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1667   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1668 
1669   /* set "a" and "j" values into matrix */
1670   nzcount = 0; jcount = 0;
1671   for (i=0; i<mbs; i++) {
1672     nzcountb = nzcount;
1673     nmask    = 0;
1674     for (j=0; j<bs; j++) {
1675       kmax = rowlengths[i*bs+j];
1676       for (k=0; k<kmax; k++) {
1677         tmp = jj[nzcount++]/bs;
1678 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1679       }
1680     }
1681     /* sort the masked values */
1682     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1683 
1684     /* set "j" values into matrix */
1685     maskcount = 1;
1686     for (j=0; j<nmask; j++) {
1687       a->j[jcount++]  = masked[j];
1688       mask[masked[j]] = maskcount++;
1689     }
1690     /* set "a" values into matrix */
1691     ishift = bs2*a->i[i];
1692     for (j=0; j<bs; j++) {
1693       kmax = rowlengths[i*bs+j];
1694       for (k=0; k<kmax; k++) {
1695         tmp       = jj[nzcountb]/bs ;
1696         block     = mask[tmp] - 1;
1697         point     = jj[nzcountb] - bs*tmp;
1698         idx       = ishift + bs2*block + j + bs*point;
1699         a->a[idx] = (MatScalar)aa[nzcountb++];
1700       }
1701     }
1702     /* zero out the mask elements we set */
1703     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1704   }
1705   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1706 
1707   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1708   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1709   ierr = PetscFree(aa);CHKERRQ(ierr);
1710   ierr = PetscFree(jj);CHKERRQ(ierr);
1711   ierr = PetscFree(mask);CHKERRQ(ierr);
1712 
1713   B->assembled = PETSC_TRUE;
1714 
1715   ierr = MatView_Private(B);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 EXTERN_C_END
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatCreateSeqBAIJ"
1722 /*@C
1723    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1724    compressed row) format.  For good matrix assembly performance the
1725    user should preallocate the matrix storage by setting the parameter nz
1726    (or the array nnz).  By setting these parameters accurately, performance
1727    during matrix assembly can be increased by more than a factor of 50.
1728 
1729    Collective on MPI_Comm
1730 
1731    Input Parameters:
1732 +  comm - MPI communicator, set to PETSC_COMM_SELF
1733 .  bs - size of block
1734 .  m - number of rows
1735 .  n - number of columns
1736 .  nz - number of nonzero blocks  per block row (same for all rows)
1737 -  nnz - array containing the number of nonzero blocks in the various block rows
1738          (possibly different for each block row) or PETSC_NULL
1739 
1740    Output Parameter:
1741 .  A - the matrix
1742 
1743    Options Database Keys:
1744 .   -mat_no_unroll - uses code that does not unroll the loops in the
1745                      block calculations (much slower)
1746 .    -mat_block_size - size of the blocks to use
1747 
1748    Level: intermediate
1749 
1750    Notes:
1751    A nonzero block is any block that as 1 or more nonzeros in it
1752 
1753    The block AIJ format is fully compatible with standard Fortran 77
1754    storage.  That is, the stored row and column indices can begin at
1755    either one (as in Fortran) or zero.  See the users' manual for details.
1756 
1757    Specify the preallocated storage with either nz or nnz (not both).
1758    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1759    allocation.  For additional details, see the users manual chapter on
1760    matrices.
1761 
1762 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1763 @*/
1764 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1765 {
1766   int ierr;
1767 
1768   PetscFunctionBegin;
1769   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1770   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1771   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 #undef __FUNCT__
1776 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1777 /*@C
1778    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1779    per row in the matrix. For good matrix assembly performance the
1780    user should preallocate the matrix storage by setting the parameter nz
1781    (or the array nnz).  By setting these parameters accurately, performance
1782    during matrix assembly can be increased by more than a factor of 50.
1783 
1784    Collective on MPI_Comm
1785 
1786    Input Parameters:
1787 +  A - the matrix
1788 .  bs - size of block
1789 .  nz - number of block nonzeros per block row (same for all rows)
1790 -  nnz - array containing the number of block nonzeros in the various block rows
1791          (possibly different for each block row) or PETSC_NULL
1792 
1793    Options Database Keys:
1794 .   -mat_no_unroll - uses code that does not unroll the loops in the
1795                      block calculations (much slower)
1796 .    -mat_block_size - size of the blocks to use
1797 
1798    Level: intermediate
1799 
1800    Notes:
1801    The block AIJ format is fully compatible with standard Fortran 77
1802    storage.  That is, the stored row and column indices can begin at
1803    either one (as in Fortran) or zero.  See the users' manual for details.
1804 
1805    Specify the preallocated storage with either nz or nnz (not both).
1806    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1807    allocation.  For additional details, see the users manual chapter on
1808    matrices.
1809 
1810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1811 @*/
1812 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1813 {
1814   Mat_SeqBAIJ *b;
1815   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1816   PetscTruth  flg;
1817 
1818   PetscFunctionBegin;
1819   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1820   if (!flg) PetscFunctionReturn(0);
1821 
1822   B->preallocated = PETSC_TRUE;
1823   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1824   if (nnz && newbs != bs) {
1825     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1826   }
1827   bs   = newbs;
1828 
1829   mbs  = B->m/bs;
1830   nbs  = B->n/bs;
1831   bs2  = bs*bs;
1832 
1833   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1834     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1835   }
1836 
1837   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1838   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1839   if (nnz) {
1840     for (i=0; i<mbs; i++) {
1841       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1842       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1843     }
1844   }
1845 
1846   b       = (Mat_SeqBAIJ*)B->data;
1847   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1848   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1849   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1850   if (!flg) {
1851     switch (bs) {
1852     case 1:
1853       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1854       B->ops->mult            = MatMult_SeqBAIJ_1;
1855       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1856       break;
1857     case 2:
1858       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1859       B->ops->mult            = MatMult_SeqBAIJ_2;
1860       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1861       break;
1862     case 3:
1863       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1864       B->ops->mult            = MatMult_SeqBAIJ_3;
1865       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1866       break;
1867     case 4:
1868       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1869       B->ops->mult            = MatMult_SeqBAIJ_4;
1870       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1871       break;
1872     case 5:
1873       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1874       B->ops->mult            = MatMult_SeqBAIJ_5;
1875       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1876       break;
1877     case 6:
1878       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1879       B->ops->mult            = MatMult_SeqBAIJ_6;
1880       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1881       break;
1882     case 7:
1883       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1884       B->ops->mult            = MatMult_SeqBAIJ_7;
1885       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1886       break;
1887     default:
1888       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1889       B->ops->mult            = MatMult_SeqBAIJ_N;
1890       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1891       break;
1892     }
1893   }
1894   b->bs      = bs;
1895   b->mbs     = mbs;
1896   b->nbs     = nbs;
1897   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1898   if (!nnz) {
1899     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1900     else if (nz <= 0)        nz = 1;
1901     for (i=0; i<mbs; i++) b->imax[i] = nz;
1902     nz = nz*mbs;
1903   } else {
1904     nz = 0;
1905     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1906   }
1907 
1908   /* allocate the matrix space */
1909   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1910   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1911   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1912   b->j  = (int*)(b->a + nz*bs2);
1913   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1914   b->i  = b->j + nz;
1915   b->singlemalloc = PETSC_TRUE;
1916 
1917   b->i[0] = 0;
1918   for (i=1; i<mbs+1; i++) {
1919     b->i[i] = b->i[i-1] + b->imax[i-1];
1920   }
1921 
1922   /* b->ilen will count nonzeros in each block row so far. */
1923   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1924   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1925   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1926 
1927   b->bs               = bs;
1928   b->bs2              = bs2;
1929   b->mbs              = mbs;
1930   b->nz               = 0;
1931   b->maxnz            = nz*bs2;
1932   B->info.nz_unneeded = (PetscReal)b->maxnz;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936