xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 93f8962258a2bb7ba9a67fccb80b97e0174941a3)
1 /*$Id: baij.c,v 1.238 2001/07/16 04:47:34 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled=PETSC_FALSE;
217       int        ierr;
218       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
219 #if defined(PETSC_HAVE_SSE)
220       if (sse_enabled) {
221           a->single_precision_solves = PETSC_TRUE;
222           A->ops->solve              = MatSolve_SeqBAIJ_Update;
223           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
224           break;
225       }
226 #else
227       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
228 #endif
229     }
230     break;
231   default:
232     SETERRQ(PETSC_ERR_SUP,"unknown option");
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
239 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
240 {
241   PetscFunctionBegin;
242   if (m) *m = 0;
243   if (n) *n = A->m;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetRow_SeqBAIJ"
249 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
250 {
251   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
252   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
253   MatScalar    *aa,*aa_i;
254   Scalar       *v_i;
255 
256   PetscFunctionBegin;
257   bs  = a->bs;
258   ai  = a->i;
259   aj  = a->j;
260   aa  = a->a;
261   bs2 = a->bs2;
262 
263   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
264 
265   bn  = row/bs;   /* Block number */
266   bp  = row % bs; /* Block Position */
267   M   = ai[bn+1] - ai[bn];
268   *nz = bs*M;
269 
270   if (v) {
271     *v = 0;
272     if (*nz) {
273       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
274       for (i=0; i<M; i++) { /* for each block in the block row */
275         v_i  = *v + i*bs;
276         aa_i = aa + bs2*(ai[bn] + i);
277         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
278       }
279     }
280   }
281 
282   if (idx) {
283     *idx = 0;
284     if (*nz) {
285       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
286       for (i=0; i<M; i++) { /* for each block in the block row */
287         idx_i = *idx + i*bs;
288         itmp  = bs*aj[ai[bn] + i];
289         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
290       }
291     }
292   }
293   PetscFunctionReturn(0);
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
298 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
299 {
300   int ierr;
301 
302   PetscFunctionBegin;
303   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
304   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatTranspose_SeqBAIJ"
310 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
311 {
312   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
313   Mat         C;
314   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
315   int         *rows,*cols,bs2=a->bs2;
316   Scalar      *array;
317 
318   PetscFunctionBegin;
319   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
320   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
321   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
322 
323 #if defined(PETSC_USE_MAT_SINGLE)
324   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
325   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
326 #else
327   array = a->a;
328 #endif
329 
330   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
331   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
332   ierr = PetscFree(col);CHKERRQ(ierr);
333   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
334   cols = rows + bs;
335   for (i=0; i<mbs; i++) {
336     cols[0] = i*bs;
337     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
338     len = ai[i+1] - ai[i];
339     for (j=0; j<len; j++) {
340       rows[0] = (*aj++)*bs;
341       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
342       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
343       array += bs2;
344     }
345   }
346   ierr = PetscFree(rows);CHKERRQ(ierr);
347 #if defined(PETSC_USE_MAT_SINGLE)
348   ierr = PetscFree(array);
349 #endif
350 
351   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353 
354   if (B) {
355     *B = C;
356   } else {
357     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
364 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
365 {
366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
367   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
368   Scalar      *aa;
369   FILE        *file;
370 
371   PetscFunctionBegin;
372   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
373   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
374   col_lens[0] = MAT_COOKIE;
375 
376   col_lens[1] = A->m;
377   col_lens[2] = A->n;
378   col_lens[3] = a->nz*bs2;
379 
380   /* store lengths of each row and write (including header) to file */
381   count = 0;
382   for (i=0; i<a->mbs; i++) {
383     for (j=0; j<bs; j++) {
384       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
385     }
386   }
387   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
388   ierr = PetscFree(col_lens);CHKERRQ(ierr);
389 
390   /* store column indices (zero start index) */
391   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
392   count = 0;
393   for (i=0; i<a->mbs; i++) {
394     for (j=0; j<bs; j++) {
395       for (k=a->i[i]; k<a->i[i+1]; k++) {
396         for (l=0; l<bs; l++) {
397           jj[count++] = bs*a->j[k] + l;
398         }
399       }
400     }
401   }
402   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
403   ierr = PetscFree(jj);CHKERRQ(ierr);
404 
405   /* store nonzero values */
406   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
407   count = 0;
408   for (i=0; i<a->mbs; i++) {
409     for (j=0; j<bs; j++) {
410       for (k=a->i[i]; k<a->i[i+1]; k++) {
411         for (l=0; l<bs; l++) {
412           aa[count++] = a->a[bs2*k + l*bs + j];
413         }
414       }
415     }
416   }
417   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
418   ierr = PetscFree(aa);CHKERRQ(ierr);
419 
420   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
421   if (file) {
422     fprintf(file,"-matload_block_size %d\n",a->bs);
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
429 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
430 {
431   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
432   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
433   PetscViewerFormat format;
434 
435   PetscFunctionBegin;
436   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
437   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
438     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
439   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
440     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
441   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
442     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
443     for (i=0; i<a->mbs; i++) {
444       for (j=0; j<bs; j++) {
445         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
446         for (k=a->i[i]; k<a->i[i+1]; k++) {
447           for (l=0; l<bs; l++) {
448 #if defined(PETSC_USE_COMPLEX)
449             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
450               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
451                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
453               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
454                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
455             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
456               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
457             }
458 #else
459             if (a->a[bs2*k + l*bs + j] != 0.0) {
460               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
461             }
462 #endif
463           }
464         }
465         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
466       }
467     }
468     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
469   } else {
470     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
471     for (i=0; i<a->mbs; i++) {
472       for (j=0; j<bs; j++) {
473         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
474         for (k=a->i[i]; k<a->i[i+1]; k++) {
475           for (l=0; l<bs; l++) {
476 #if defined(PETSC_USE_COMPLEX)
477             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
478               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
479                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
481               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
482                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
483             } else {
484               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
485             }
486 #else
487             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
488 #endif
489           }
490         }
491         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
492       }
493     }
494     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
495   }
496   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
502 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
503 {
504   Mat          A = (Mat) Aa;
505   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
506   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
507   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
508   MatScalar    *aa;
509   PetscViewer  viewer;
510 
511   PetscFunctionBegin;
512 
513   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
514   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
515 
516   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
517 
518   /* loop over matrix elements drawing boxes */
519   color = PETSC_DRAW_BLUE;
520   for (i=0,row=0; i<mbs; i++,row+=bs) {
521     for (j=a->i[i]; j<a->i[i+1]; j++) {
522       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
523       x_l = a->j[j]*bs; x_r = x_l + 1.0;
524       aa = a->a + j*bs2;
525       for (k=0; k<bs; k++) {
526         for (l=0; l<bs; l++) {
527           if (PetscRealPart(*aa++) >=  0.) continue;
528           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
529         }
530       }
531     }
532   }
533   color = PETSC_DRAW_CYAN;
534   for (i=0,row=0; i<mbs; i++,row+=bs) {
535     for (j=a->i[i]; j<a->i[i+1]; j++) {
536       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
537       x_l = a->j[j]*bs; x_r = x_l + 1.0;
538       aa = a->a + j*bs2;
539       for (k=0; k<bs; k++) {
540         for (l=0; l<bs; l++) {
541           if (PetscRealPart(*aa++) != 0.) continue;
542           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
543         }
544       }
545     }
546   }
547 
548   color = PETSC_DRAW_RED;
549   for (i=0,row=0; i<mbs; i++,row+=bs) {
550     for (j=a->i[i]; j<a->i[i+1]; j++) {
551       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
552       x_l = a->j[j]*bs; x_r = x_l + 1.0;
553       aa = a->a + j*bs2;
554       for (k=0; k<bs; k++) {
555         for (l=0; l<bs; l++) {
556           if (PetscRealPart(*aa++) <= 0.) continue;
557           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
567 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
568 {
569   int          ierr;
570   PetscReal    xl,yl,xr,yr,w,h;
571   PetscDraw    draw;
572   PetscTruth   isnull;
573 
574   PetscFunctionBegin;
575 
576   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
577   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
578 
579   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
580   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
581   xr += w;    yr += h;  xl = -w;     yl = -h;
582   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
583   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
584   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_SeqBAIJ"
590 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth issocket,isascii,isbinary,isdraw;
594 
595   PetscFunctionBegin;
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
600   if (issocket) {
601     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
602   } else if (isascii){
603     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
606   } else if (isdraw) {
607     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
608   } else {
609     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatGetValues_SeqBAIJ"
617 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
618 {
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
621   int        *ai = a->i,*ailen = a->ilen;
622   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
623   MatScalar  *ap,*aa = a->a,zero = 0.0;
624 
625   PetscFunctionBegin;
626   for (k=0; k<m; k++) { /* loop over rows */
627     row  = im[k]; brow = row/bs;
628     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
629     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
630     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
631     nrow = ailen[brow];
632     for (l=0; l<n; l++) { /* loop over columns */
633       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
634       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
635       col  = in[l] ;
636       bcol = col/bs;
637       cidx = col%bs;
638       ridx = row%bs;
639       high = nrow;
640       low  = 0; /* assume unsorted */
641       while (high-low > 5) {
642         t = (low+high)/2;
643         if (rp[t] > bcol) high = t;
644         else             low  = t;
645       }
646       for (i=low; i<high; i++) {
647         if (rp[i] > bcol) break;
648         if (rp[i] == bcol) {
649           *v++ = ap[bs2*i+bs*cidx+ridx];
650           goto finished;
651         }
652       }
653       *v++ = zero;
654       finished:;
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #if defined(PETSC_USE_MAT_SINGLE)
661 #undef __FUNCT__
662 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
663 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
664 {
665   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
666   int         ierr,i,N = m*n*b->bs2;
667   MatScalar   *vsingle;
668 
669   PetscFunctionBegin;
670   if (N > b->setvalueslen) {
671     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
672     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
673     b->setvalueslen  = N;
674   }
675   vsingle = b->setvaluescopy;
676   for (i=0; i<N; i++) {
677     vsingle[i] = v[i];
678   }
679   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 #endif
683 
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
687 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
688 {
689   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
691   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
692   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
693   PetscTruth  roworiented=a->roworiented;
694   MatScalar   *value = v,*ap,*aa = a->a,*bap;
695 
696   PetscFunctionBegin;
697   if (roworiented) {
698     stepval = (n-1)*bs;
699   } else {
700     stepval = (m-1)*bs;
701   }
702   for (k=0; k<m; k++) { /* loop over added rows */
703     row  = im[k];
704     if (row < 0) continue;
705 #if defined(PETSC_USE_BOPT_g)
706     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
707 #endif
708     rp   = aj + ai[row];
709     ap   = aa + bs2*ai[row];
710     rmax = imax[row];
711     nrow = ailen[row];
712     low  = 0;
713     for (l=0; l<n; l++) { /* loop over added columns */
714       if (in[l] < 0) continue;
715 #if defined(PETSC_USE_BOPT_g)
716       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
717 #endif
718       col = in[l];
719       if (roworiented) {
720         value = v + k*(stepval+bs)*bs + l*bs;
721       } else {
722         value = v + l*(stepval+bs)*bs + k*bs;
723       }
724       if (!sorted) low = 0; high = nrow;
725       while (high-low > 7) {
726         t = (low+high)/2;
727         if (rp[t] > col) high = t;
728         else             low  = t;
729       }
730       for (i=low; i<high; i++) {
731         if (rp[i] > col) break;
732         if (rp[i] == col) {
733           bap  = ap +  bs2*i;
734           if (roworiented) {
735             if (is == ADD_VALUES) {
736               for (ii=0; ii<bs; ii++,value+=stepval) {
737                 for (jj=ii; jj<bs2; jj+=bs) {
738                   bap[jj] += *value++;
739                 }
740               }
741             } else {
742               for (ii=0; ii<bs; ii++,value+=stepval) {
743                 for (jj=ii; jj<bs2; jj+=bs) {
744                   bap[jj] = *value++;
745                 }
746               }
747             }
748           } else {
749             if (is == ADD_VALUES) {
750               for (ii=0; ii<bs; ii++,value+=stepval) {
751                 for (jj=0; jj<bs; jj++) {
752                   *bap++ += *value++;
753                 }
754               }
755             } else {
756               for (ii=0; ii<bs; ii++,value+=stepval) {
757                 for (jj=0; jj<bs; jj++) {
758                   *bap++  = *value++;
759                 }
760               }
761             }
762           }
763           goto noinsert2;
764         }
765       }
766       if (nonew == 1) goto noinsert2;
767       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
768       if (nrow >= rmax) {
769         /* there is no extra room in row, therefore enlarge */
770         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
771         MatScalar *new_a;
772 
773         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
774 
775         /* malloc new storage space */
776         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
777 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
778         new_j   = (int*)(new_a + bs2*new_nz);
779         new_i   = new_j + new_nz;
780 
781         /* copy over old data into new slots */
782         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
783         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
784         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
785         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
786         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
787         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
788         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
790         /* free up old matrix storage */
791         ierr = PetscFree(a->a);CHKERRQ(ierr);
792         if (!a->singlemalloc) {
793           ierr = PetscFree(a->i);CHKERRQ(ierr);
794           ierr = PetscFree(a->j);CHKERRQ(ierr);
795         }
796         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
797         a->singlemalloc = PETSC_TRUE;
798 
799         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
800         rmax = imax[row] = imax[row] + CHUNKSIZE;
801         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
802         a->maxnz += bs2*CHUNKSIZE;
803         a->reallocs++;
804         a->nz++;
805       }
806       N = nrow++ - 1;
807       /* shift up all the later entries in this row */
808       for (ii=N; ii>=i; ii--) {
809         rp[ii+1] = rp[ii];
810         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
811       }
812       if (N >= i) {
813         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
814       }
815       rp[i] = col;
816       bap   = ap +  bs2*i;
817       if (roworiented) {
818         for (ii=0; ii<bs; ii++,value+=stepval) {
819           for (jj=ii; jj<bs2; jj+=bs) {
820             bap[jj] = *value++;
821           }
822         }
823       } else {
824         for (ii=0; ii<bs; ii++,value+=stepval) {
825           for (jj=0; jj<bs; jj++) {
826             *bap++  = *value++;
827           }
828         }
829       }
830       noinsert2:;
831       low = i;
832     }
833     ailen[row] = nrow;
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
841 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
842 {
843   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
844   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
845   int        m = A->m,*ip,N,*ailen = a->ilen;
846   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
847   MatScalar  *aa = a->a,*ap;
848 
849   PetscFunctionBegin;
850   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
851 
852   if (m) rmax = ailen[0];
853   for (i=1; i<mbs; i++) {
854     /* move each row back by the amount of empty slots (fshift) before it*/
855     fshift += imax[i-1] - ailen[i-1];
856     rmax   = PetscMax(rmax,ailen[i]);
857     if (fshift) {
858       ip = aj + ai[i]; ap = aa + bs2*ai[i];
859       N = ailen[i];
860       for (j=0; j<N; j++) {
861         ip[j-fshift] = ip[j];
862         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
863       }
864     }
865     ai[i] = ai[i-1] + ailen[i-1];
866   }
867   if (mbs) {
868     fshift += imax[mbs-1] - ailen[mbs-1];
869     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
870   }
871   /* reset ilen and imax for each row */
872   for (i=0; i<mbs; i++) {
873     ailen[i] = imax[i] = ai[i+1] - ai[i];
874   }
875   a->nz = ai[mbs];
876 
877   /* diagonals may have moved, so kill the diagonal pointers */
878   if (fshift && a->diag) {
879     ierr = PetscFree(a->diag);CHKERRQ(ierr);
880     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
881     a->diag = 0;
882   }
883   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
884            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
885   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
886            a->reallocs);
887   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
888   a->reallocs          = 0;
889   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
890 
891   PetscFunctionReturn(0);
892 }
893 
894 
895 
896 /*
897    This function returns an array of flags which indicate the locations of contiguous
898    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
899    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
900    Assume: sizes should be long enough to hold all the values.
901 */
902 #undef __FUNCT__
903 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
904 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
905 {
906   int        i,j,k,row;
907   PetscTruth flg;
908 
909   PetscFunctionBegin;
910   for (i=0,j=0; i<n; j++) {
911     row = idx[i];
912     if (row%bs!=0) { /* Not the begining of a block */
913       sizes[j] = 1;
914       i++;
915     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
916       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
917       i++;
918     } else { /* Begining of the block, so check if the complete block exists */
919       flg = PETSC_TRUE;
920       for (k=1; k<bs; k++) {
921         if (row+k != idx[i+k]) { /* break in the block */
922           flg = PETSC_FALSE;
923           break;
924         }
925       }
926       if (flg == PETSC_TRUE) { /* No break in the bs */
927         sizes[j] = bs;
928         i+= bs;
929       } else {
930         sizes[j] = 1;
931         i++;
932       }
933     }
934   }
935   *bs_max = j;
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
941 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
942 {
943   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
944   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
945   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
946   Scalar      zero = 0.0;
947   MatScalar   *aa;
948 
949   PetscFunctionBegin;
950   /* Make a copy of the IS and  sort it */
951   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
952   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
953 
954   /* allocate memory for rows,sizes */
955   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
956   sizes = rows + is_n;
957 
958   /* copy IS values to rows, and sort them */
959   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
960   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
961   if (baij->keepzeroedrows) {
962     for (i=0; i<is_n; i++) { sizes[i] = 1; }
963     bs_max = is_n;
964   } else {
965     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
966   }
967   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
968 
969   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
970     row   = rows[j];
971     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
972     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
973     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
974     if (sizes[i] == bs && !baij->keepzeroedrows) {
975       if (diag) {
976         if (baij->ilen[row/bs] > 0) {
977           baij->ilen[row/bs]       = 1;
978           baij->j[baij->i[row/bs]] = row/bs;
979           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
980         }
981         /* Now insert all the diagonal values for this bs */
982         for (k=0; k<bs; k++) {
983           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
984         }
985       } else { /* (!diag) */
986         baij->ilen[row/bs] = 0;
987       } /* end (!diag) */
988     } else { /* (sizes[i] != bs) */
989 #if defined (PETSC_USE_DEBUG)
990       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
991 #endif
992       for (k=0; k<count; k++) {
993         aa[0] =  zero;
994         aa    += bs;
995       }
996       if (diag) {
997         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
998       }
999     }
1000   }
1001 
1002   ierr = PetscFree(rows);CHKERRQ(ierr);
1003   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1009 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1010 {
1011   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1012   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1013   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1014   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1015   int         ridx,cidx,bs2=a->bs2,ierr;
1016   PetscTruth  roworiented=a->roworiented;
1017   MatScalar   *ap,value,*aa=a->a,*bap;
1018 
1019   PetscFunctionBegin;
1020   for (k=0; k<m; k++) { /* loop over added rows */
1021     row  = im[k]; brow = row/bs;
1022     if (row < 0) continue;
1023 #if defined(PETSC_USE_BOPT_g)
1024     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1025 #endif
1026     rp   = aj + ai[brow];
1027     ap   = aa + bs2*ai[brow];
1028     rmax = imax[brow];
1029     nrow = ailen[brow];
1030     low  = 0;
1031     for (l=0; l<n; l++) { /* loop over added columns */
1032       if (in[l] < 0) continue;
1033 #if defined(PETSC_USE_BOPT_g)
1034       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1035 #endif
1036       col = in[l]; bcol = col/bs;
1037       ridx = row % bs; cidx = col % bs;
1038       if (roworiented) {
1039         value = v[l + k*n];
1040       } else {
1041         value = v[k + l*m];
1042       }
1043       if (!sorted) low = 0; high = nrow;
1044       while (high-low > 7) {
1045         t = (low+high)/2;
1046         if (rp[t] > bcol) high = t;
1047         else              low  = t;
1048       }
1049       for (i=low; i<high; i++) {
1050         if (rp[i] > bcol) break;
1051         if (rp[i] == bcol) {
1052           bap  = ap +  bs2*i + bs*cidx + ridx;
1053           if (is == ADD_VALUES) *bap += value;
1054           else                  *bap  = value;
1055           goto noinsert1;
1056         }
1057       }
1058       if (nonew == 1) goto noinsert1;
1059       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1060       if (nrow >= rmax) {
1061         /* there is no extra room in row, therefore enlarge */
1062         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1063         MatScalar *new_a;
1064 
1065         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1066 
1067         /* Malloc new storage space */
1068         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1069 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1070         new_j   = (int*)(new_a + bs2*new_nz);
1071         new_i   = new_j + new_nz;
1072 
1073         /* copy over old data into new slots */
1074         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1075         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1076         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1077         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1078         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1079         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1080         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1082         /* free up old matrix storage */
1083         ierr = PetscFree(a->a);CHKERRQ(ierr);
1084         if (!a->singlemalloc) {
1085           ierr = PetscFree(a->i);CHKERRQ(ierr);
1086           ierr = PetscFree(a->j);CHKERRQ(ierr);
1087         }
1088         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1089         a->singlemalloc = PETSC_TRUE;
1090 
1091         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1092         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1093         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1094         a->maxnz += bs2*CHUNKSIZE;
1095         a->reallocs++;
1096         a->nz++;
1097       }
1098       N = nrow++ - 1;
1099       /* shift up all the later entries in this row */
1100       for (ii=N; ii>=i; ii--) {
1101         rp[ii+1] = rp[ii];
1102         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1103       }
1104       if (N>=i) {
1105         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1106       }
1107       rp[i]                      = bcol;
1108       ap[bs2*i + bs*cidx + ridx] = value;
1109       noinsert1:;
1110       low = i;
1111     }
1112     ailen[brow] = nrow;
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1120 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1121 {
1122   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1123   Mat         outA;
1124   int         ierr;
1125   PetscTruth  row_identity,col_identity;
1126 
1127   PetscFunctionBegin;
1128   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1129   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1130   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1131   if (!row_identity || !col_identity) {
1132     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1133   }
1134 
1135   outA          = inA;
1136   inA->factor   = FACTOR_LU;
1137 
1138   if (!a->diag) {
1139     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1140   }
1141   /*
1142       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1143       for ILU(0) factorization with natural ordering
1144   */
1145   if (a->bs < 8) {
1146     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1147   } else {
1148     a->row        = row;
1149     a->col        = col;
1150     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1151     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1152 
1153     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1154     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1155     PetscLogObjectParent(inA,a->icol);
1156 
1157     if (!a->solve_work) {
1158       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1159       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1160     }
1161   }
1162 
1163   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1164 
1165   PetscFunctionReturn(0);
1166 }
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1169 int MatPrintHelp_SeqBAIJ(Mat A)
1170 {
1171   static PetscTruth called = PETSC_FALSE;
1172   MPI_Comm          comm = A->comm;
1173   int               ierr;
1174 
1175   PetscFunctionBegin;
1176   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1177   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1178   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 EXTERN_C_BEGIN
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1185 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1186 {
1187   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1188   int         i,nz,n;
1189 
1190   PetscFunctionBegin;
1191   nz = baij->maxnz;
1192   n  = mat->n;
1193   for (i=0; i<nz; i++) {
1194     baij->j[i] = indices[i];
1195   }
1196   baij->nz = nz;
1197   for (i=0; i<n; i++) {
1198     baij->ilen[i] = baij->imax[i];
1199   }
1200 
1201   PetscFunctionReturn(0);
1202 }
1203 EXTERN_C_END
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1207 /*@
1208     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1209        in the matrix.
1210 
1211   Input Parameters:
1212 +  mat - the SeqBAIJ matrix
1213 -  indices - the column indices
1214 
1215   Level: advanced
1216 
1217   Notes:
1218     This can be called if you have precomputed the nonzero structure of the
1219   matrix and want to provide it to the matrix object to improve the performance
1220   of the MatSetValues() operation.
1221 
1222     You MUST have set the correct numbers of nonzeros per row in the call to
1223   MatCreateSeqBAIJ().
1224 
1225     MUST be called before any calls to MatSetValues();
1226 
1227 @*/
1228 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1229 {
1230   int ierr,(*f)(Mat,int *);
1231 
1232   PetscFunctionBegin;
1233   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1234   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1235   if (f) {
1236     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1237   } else {
1238     SETERRQ(1,"Wrong type of matrix to set column indices");
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1245 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1246 {
1247   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1248   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1249   PetscReal    atmp;
1250   Scalar       *x,zero = 0.0;
1251   MatScalar    *aa;
1252   int          ncols,brow,krow,kcol;
1253 
1254   PetscFunctionBegin;
1255   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1256   bs   = a->bs;
1257   aa   = a->a;
1258   ai   = a->i;
1259   aj   = a->j;
1260   mbs = a->mbs;
1261 
1262   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1263   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1264   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1265   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1266   for (i=0; i<mbs; i++) {
1267     ncols = ai[1] - ai[0]; ai++;
1268     brow  = bs*i;
1269     for (j=0; j<ncols; j++){
1270       /* bcol = bs*(*aj); */
1271       for (kcol=0; kcol<bs; kcol++){
1272         for (krow=0; krow<bs; krow++){
1273           atmp = PetscAbsScalar(*aa); aa++;
1274           row = brow + krow;    /* row index */
1275           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1276           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1277         }
1278       }
1279       aj++;
1280     }
1281   }
1282   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1288 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1289 {
1290   int        ierr;
1291 
1292   PetscFunctionBegin;
1293   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1299 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1300 {
1301   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1302   PetscFunctionBegin;
1303   *array = a->a;
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1309 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1310 {
1311   PetscFunctionBegin;
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 /* -------------------------------------------------------------------*/
1316 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1317        MatGetRow_SeqBAIJ,
1318        MatRestoreRow_SeqBAIJ,
1319        MatMult_SeqBAIJ_N,
1320        MatMultAdd_SeqBAIJ_N,
1321        MatMultTranspose_SeqBAIJ,
1322        MatMultTransposeAdd_SeqBAIJ,
1323        MatSolve_SeqBAIJ_N,
1324        0,
1325        0,
1326        0,
1327        MatLUFactor_SeqBAIJ,
1328        0,
1329        0,
1330        MatTranspose_SeqBAIJ,
1331        MatGetInfo_SeqBAIJ,
1332        MatEqual_SeqBAIJ,
1333        MatGetDiagonal_SeqBAIJ,
1334        MatDiagonalScale_SeqBAIJ,
1335        MatNorm_SeqBAIJ,
1336        0,
1337        MatAssemblyEnd_SeqBAIJ,
1338        0,
1339        MatSetOption_SeqBAIJ,
1340        MatZeroEntries_SeqBAIJ,
1341        MatZeroRows_SeqBAIJ,
1342        MatLUFactorSymbolic_SeqBAIJ,
1343        MatLUFactorNumeric_SeqBAIJ_N,
1344        0,
1345        0,
1346        MatSetUpPreallocation_SeqBAIJ,
1347        0,
1348        MatGetOwnershipRange_SeqBAIJ,
1349        MatILUFactorSymbolic_SeqBAIJ,
1350        0,
1351        MatGetArray_SeqBAIJ,
1352        MatRestoreArray_SeqBAIJ,
1353        MatDuplicate_SeqBAIJ,
1354        0,
1355        0,
1356        MatILUFactor_SeqBAIJ,
1357        0,
1358        0,
1359        MatGetSubMatrices_SeqBAIJ,
1360        MatIncreaseOverlap_SeqBAIJ,
1361        MatGetValues_SeqBAIJ,
1362        0,
1363        MatPrintHelp_SeqBAIJ,
1364        MatScale_SeqBAIJ,
1365        0,
1366        0,
1367        0,
1368        MatGetBlockSize_SeqBAIJ,
1369        MatGetRowIJ_SeqBAIJ,
1370        MatRestoreRowIJ_SeqBAIJ,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        MatSetValuesBlocked_SeqBAIJ,
1378        MatGetSubMatrix_SeqBAIJ,
1379        MatDestroy_SeqBAIJ,
1380        MatView_SeqBAIJ,
1381        MatGetMaps_Petsc,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        0,
1388        MatGetRowMax_SeqBAIJ,
1389        MatConvert_Basic};
1390 
1391 EXTERN_C_BEGIN
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1394 int MatStoreValues_SeqBAIJ(Mat mat)
1395 {
1396   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1397   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1398   int         ierr;
1399 
1400   PetscFunctionBegin;
1401   if (aij->nonew != 1) {
1402     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1403   }
1404 
1405   /* allocate space for values if not already there */
1406   if (!aij->saved_values) {
1407     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1408   }
1409 
1410   /* copy values over */
1411   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 EXTERN_C_END
1415 
1416 EXTERN_C_BEGIN
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1419 int MatRetrieveValues_SeqBAIJ(Mat mat)
1420 {
1421   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1422   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1423 
1424   PetscFunctionBegin;
1425   if (aij->nonew != 1) {
1426     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1427   }
1428   if (!aij->saved_values) {
1429     SETERRQ(1,"Must call MatStoreValues(A);first");
1430   }
1431 
1432   /* copy values over */
1433   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 EXTERN_C_END
1437 
1438 EXTERN_C_BEGIN
1439 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1440 EXTERN_C_END
1441 
1442 EXTERN_C_BEGIN
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatCreate_SeqBAIJ"
1445 int MatCreate_SeqBAIJ(Mat B)
1446 {
1447   int         ierr,size;
1448   Mat_SeqBAIJ *b;
1449 
1450   PetscFunctionBegin;
1451   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1452   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1453 
1454   B->m = B->M = PetscMax(B->m,B->M);
1455   B->n = B->N = PetscMax(B->n,B->N);
1456   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1457   B->data = (void*)b;
1458   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1459   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1460   B->factor           = 0;
1461   B->lupivotthreshold = 1.0;
1462   B->mapping          = 0;
1463   b->row              = 0;
1464   b->col              = 0;
1465   b->icol             = 0;
1466   b->reallocs         = 0;
1467   b->saved_values     = 0;
1468 #if defined(PETSC_USE_MAT_SINGLE)
1469   b->setvalueslen     = 0;
1470   b->setvaluescopy    = PETSC_NULL;
1471 #endif
1472   b->single_precision_solves = PETSC_FALSE;
1473 
1474   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1475   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1476 
1477   b->sorted           = PETSC_FALSE;
1478   b->roworiented      = PETSC_TRUE;
1479   b->nonew            = 0;
1480   b->diag             = 0;
1481   b->solve_work       = 0;
1482   b->mult_work        = 0;
1483   b->spptr            = 0;
1484   B->info.nz_unneeded = (PetscReal)b->maxnz;
1485   b->keepzeroedrows   = PETSC_FALSE;
1486 
1487   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1488                                      "MatStoreValues_SeqBAIJ",
1489                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1491                                      "MatRetrieveValues_SeqBAIJ",
1492                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1494                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1495                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1497                                      "MatConvert_SeqBAIJ_SeqAIJ",
1498                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 EXTERN_C_END
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1505 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1506 {
1507   Mat         C;
1508   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1509   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1510 
1511   PetscFunctionBegin;
1512   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1513 
1514   *B = 0;
1515   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1516   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1517   c    = (Mat_SeqBAIJ*)C->data;
1518 
1519   c->bs         = a->bs;
1520   c->bs2        = a->bs2;
1521   c->mbs        = a->mbs;
1522   c->nbs        = a->nbs;
1523   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1524 
1525   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1526   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1527   for (i=0; i<mbs; i++) {
1528     c->imax[i] = a->imax[i];
1529     c->ilen[i] = a->ilen[i];
1530   }
1531 
1532   /* allocate the matrix space */
1533   c->singlemalloc = PETSC_TRUE;
1534   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1535   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1536   c->j = (int*)(c->a + nz*bs2);
1537   c->i = c->j + nz;
1538   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1539   if (mbs > 0) {
1540     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1541     if (cpvalues == MAT_COPY_VALUES) {
1542       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1543     } else {
1544       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1545     }
1546   }
1547 
1548   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1549   c->sorted      = a->sorted;
1550   c->roworiented = a->roworiented;
1551   c->nonew       = a->nonew;
1552 
1553   if (a->diag) {
1554     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1555     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1556     for (i=0; i<mbs; i++) {
1557       c->diag[i] = a->diag[i];
1558     }
1559   } else c->diag        = 0;
1560   c->nz                 = a->nz;
1561   c->maxnz              = a->maxnz;
1562   c->solve_work         = 0;
1563   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1564   c->mult_work          = 0;
1565   C->preallocated       = PETSC_TRUE;
1566   C->assembled          = PETSC_TRUE;
1567   *B = C;
1568   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 EXTERN_C_BEGIN
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatLoad_SeqBAIJ"
1575 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1576 {
1577   Mat_SeqBAIJ  *a;
1578   Mat          B;
1579   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1580   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1581   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1582   int          *masked,nmask,tmp,bs2,ishift;
1583   Scalar       *aa;
1584   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1585 
1586   PetscFunctionBegin;
1587   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1588   bs2  = bs*bs;
1589 
1590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1591   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1592   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1593   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1594   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1595   M = header[1]; N = header[2]; nz = header[3];
1596 
1597   if (header[3] < 0) {
1598     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1599   }
1600 
1601   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1602 
1603   /*
1604      This code adds extra rows to make sure the number of rows is
1605     divisible by the blocksize
1606   */
1607   mbs        = M/bs;
1608   extra_rows = bs - M + bs*(mbs);
1609   if (extra_rows == bs) extra_rows = 0;
1610   else                  mbs++;
1611   if (extra_rows) {
1612     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1613   }
1614 
1615   /* read in row lengths */
1616   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1617   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1618   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1619 
1620   /* read in column indices */
1621   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1622   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1623   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1624 
1625   /* loop over row lengths determining block row lengths */
1626   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1627   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1628   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1629   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1630   masked   = mask + mbs;
1631   rowcount = 0; nzcount = 0;
1632   for (i=0; i<mbs; i++) {
1633     nmask = 0;
1634     for (j=0; j<bs; j++) {
1635       kmax = rowlengths[rowcount];
1636       for (k=0; k<kmax; k++) {
1637         tmp = jj[nzcount++]/bs;
1638         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1639       }
1640       rowcount++;
1641     }
1642     browlengths[i] += nmask;
1643     /* zero out the mask elements we set */
1644     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1645   }
1646 
1647   /* create our matrix */
1648   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1649   B = *A;
1650   a = (Mat_SeqBAIJ*)B->data;
1651 
1652   /* set matrix "i" values */
1653   a->i[0] = 0;
1654   for (i=1; i<= mbs; i++) {
1655     a->i[i]      = a->i[i-1] + browlengths[i-1];
1656     a->ilen[i-1] = browlengths[i-1];
1657   }
1658   a->nz         = 0;
1659   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1660 
1661   /* read in nonzero values */
1662   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1663   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1664   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1665 
1666   /* set "a" and "j" values into matrix */
1667   nzcount = 0; jcount = 0;
1668   for (i=0; i<mbs; i++) {
1669     nzcountb = nzcount;
1670     nmask    = 0;
1671     for (j=0; j<bs; j++) {
1672       kmax = rowlengths[i*bs+j];
1673       for (k=0; k<kmax; k++) {
1674         tmp = jj[nzcount++]/bs;
1675 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1676       }
1677     }
1678     /* sort the masked values */
1679     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1680 
1681     /* set "j" values into matrix */
1682     maskcount = 1;
1683     for (j=0; j<nmask; j++) {
1684       a->j[jcount++]  = masked[j];
1685       mask[masked[j]] = maskcount++;
1686     }
1687     /* set "a" values into matrix */
1688     ishift = bs2*a->i[i];
1689     for (j=0; j<bs; j++) {
1690       kmax = rowlengths[i*bs+j];
1691       for (k=0; k<kmax; k++) {
1692         tmp       = jj[nzcountb]/bs ;
1693         block     = mask[tmp] - 1;
1694         point     = jj[nzcountb] - bs*tmp;
1695         idx       = ishift + bs2*block + j + bs*point;
1696         a->a[idx] = (MatScalar)aa[nzcountb++];
1697       }
1698     }
1699     /* zero out the mask elements we set */
1700     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1701   }
1702   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1703 
1704   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1705   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1706   ierr = PetscFree(aa);CHKERRQ(ierr);
1707   ierr = PetscFree(jj);CHKERRQ(ierr);
1708   ierr = PetscFree(mask);CHKERRQ(ierr);
1709 
1710   B->assembled = PETSC_TRUE;
1711 
1712   ierr = MatView_Private(B);CHKERRQ(ierr);
1713   PetscFunctionReturn(0);
1714 }
1715 EXTERN_C_END
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatCreateSeqBAIJ"
1719 /*@C
1720    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1721    compressed row) format.  For good matrix assembly performance the
1722    user should preallocate the matrix storage by setting the parameter nz
1723    (or the array nnz).  By setting these parameters accurately, performance
1724    during matrix assembly can be increased by more than a factor of 50.
1725 
1726    Collective on MPI_Comm
1727 
1728    Input Parameters:
1729 +  comm - MPI communicator, set to PETSC_COMM_SELF
1730 .  bs - size of block
1731 .  m - number of rows
1732 .  n - number of columns
1733 .  nz - number of nonzero blocks  per block row (same for all rows)
1734 -  nnz - array containing the number of nonzero blocks in the various block rows
1735          (possibly different for each block row) or PETSC_NULL
1736 
1737    Output Parameter:
1738 .  A - the matrix
1739 
1740    Options Database Keys:
1741 .   -mat_no_unroll - uses code that does not unroll the loops in the
1742                      block calculations (much slower)
1743 .    -mat_block_size - size of the blocks to use
1744 
1745    Level: intermediate
1746 
1747    Notes:
1748    A nonzero block is any block that as 1 or more nonzeros in it
1749 
1750    The block AIJ format is fully compatible with standard Fortran 77
1751    storage.  That is, the stored row and column indices can begin at
1752    either one (as in Fortran) or zero.  See the users' manual for details.
1753 
1754    Specify the preallocated storage with either nz or nnz (not both).
1755    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1756    allocation.  For additional details, see the users manual chapter on
1757    matrices.
1758 
1759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1760 @*/
1761 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1762 {
1763   int ierr;
1764 
1765   PetscFunctionBegin;
1766   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1767   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1768   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1774 /*@C
1775    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1776    per row in the matrix. For good matrix assembly performance the
1777    user should preallocate the matrix storage by setting the parameter nz
1778    (or the array nnz).  By setting these parameters accurately, performance
1779    during matrix assembly can be increased by more than a factor of 50.
1780 
1781    Collective on MPI_Comm
1782 
1783    Input Parameters:
1784 +  A - the matrix
1785 .  bs - size of block
1786 .  nz - number of block nonzeros per block row (same for all rows)
1787 -  nnz - array containing the number of block nonzeros in the various block rows
1788          (possibly different for each block row) or PETSC_NULL
1789 
1790    Options Database Keys:
1791 .   -mat_no_unroll - uses code that does not unroll the loops in the
1792                      block calculations (much slower)
1793 .    -mat_block_size - size of the blocks to use
1794 
1795    Level: intermediate
1796 
1797    Notes:
1798    The block AIJ format is fully compatible with standard Fortran 77
1799    storage.  That is, the stored row and column indices can begin at
1800    either one (as in Fortran) or zero.  See the users' manual for details.
1801 
1802    Specify the preallocated storage with either nz or nnz (not both).
1803    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1804    allocation.  For additional details, see the users manual chapter on
1805    matrices.
1806 
1807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1808 @*/
1809 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1810 {
1811   Mat_SeqBAIJ *b;
1812   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1813   PetscTruth  flg;
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1817   if (!flg) PetscFunctionReturn(0);
1818 
1819   B->preallocated = PETSC_TRUE;
1820   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1821   if (nnz && newbs != bs) {
1822     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1823   }
1824   bs   = newbs;
1825 
1826   mbs  = B->m/bs;
1827   nbs  = B->n/bs;
1828   bs2  = bs*bs;
1829 
1830   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1831     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1832   }
1833 
1834   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1835   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1836   if (nnz) {
1837     for (i=0; i<mbs; i++) {
1838       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1839       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1840     }
1841   }
1842 
1843   b       = (Mat_SeqBAIJ*)B->data;
1844   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1845   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1846   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1847   if (!flg) {
1848     switch (bs) {
1849     case 1:
1850       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1851       B->ops->mult            = MatMult_SeqBAIJ_1;
1852       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1853       break;
1854     case 2:
1855       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1856       B->ops->mult            = MatMult_SeqBAIJ_2;
1857       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1858       break;
1859     case 3:
1860       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1861       B->ops->mult            = MatMult_SeqBAIJ_3;
1862       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1863       break;
1864     case 4:
1865       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1866       B->ops->mult            = MatMult_SeqBAIJ_4;
1867       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1868       break;
1869     case 5:
1870       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1871       B->ops->mult            = MatMult_SeqBAIJ_5;
1872       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1873       break;
1874     case 6:
1875       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1876       B->ops->mult            = MatMult_SeqBAIJ_6;
1877       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1878       break;
1879     case 7:
1880       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1881       B->ops->mult            = MatMult_SeqBAIJ_7;
1882       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1883       break;
1884     default:
1885       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1886       B->ops->mult            = MatMult_SeqBAIJ_N;
1887       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1888       break;
1889     }
1890   }
1891   b->bs      = bs;
1892   b->mbs     = mbs;
1893   b->nbs     = nbs;
1894   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1895   if (!nnz) {
1896     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1897     else if (nz <= 0)        nz = 1;
1898     for (i=0; i<mbs; i++) b->imax[i] = nz;
1899     nz = nz*mbs;
1900   } else {
1901     nz = 0;
1902     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1903   }
1904 
1905   /* allocate the matrix space */
1906   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1907   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1908   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1909   b->j  = (int*)(b->a + nz*bs2);
1910   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1911   b->i  = b->j + nz;
1912   b->singlemalloc = PETSC_TRUE;
1913 
1914   b->i[0] = 0;
1915   for (i=1; i<mbs+1; i++) {
1916     b->i[i] = b->i[i-1] + b->imax[i-1];
1917   }
1918 
1919   /* b->ilen will count nonzeros in each block row so far. */
1920   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1921   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1922   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1923 
1924   b->bs               = bs;
1925   b->bs2              = bs2;
1926   b->mbs              = mbs;
1927   b->nz               = 0;
1928   b->maxnz            = nz*bs2;
1929   B->info.nz_unneeded = (PetscReal)b->maxnz;
1930   PetscFunctionReturn(0);
1931 }
1932 
1933