xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b64c01cbcd199c4180ee5f837fa6ef0250467912)
1 /*$Id: baij.c,v 1.236 2001/07/12 23:47:46 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled=PETSC_FALSE,single_prec,flg;
217       int        ierr;
218       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
219       ierr = PetscOptionsGetLogical(PETSC_NULL,"-mat_single_precision_solves",&single_prec,&flg);CHKERRQ(ierr);
220       if (sse_enabled && flg && single_prec) {
221           a->single_precision_solves = PETSC_TRUE;
222           A->ops->solve              = MatSolve_SeqBAIJ_Update;
223           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
224           break;
225       }
226       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
227     }
228     break;
229   default:
230     SETERRQ(PETSC_ERR_SUP,"unknown option");
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
237 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
238 {
239   PetscFunctionBegin;
240   if (m) *m = 0;
241   if (n) *n = A->m;
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "MatGetRow_SeqBAIJ"
247 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
248 {
249   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
250   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
251   MatScalar    *aa,*aa_i;
252   Scalar       *v_i;
253 
254   PetscFunctionBegin;
255   bs  = a->bs;
256   ai  = a->i;
257   aj  = a->j;
258   aa  = a->a;
259   bs2 = a->bs2;
260 
261   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
262 
263   bn  = row/bs;   /* Block number */
264   bp  = row % bs; /* Block Position */
265   M   = ai[bn+1] - ai[bn];
266   *nz = bs*M;
267 
268   if (v) {
269     *v = 0;
270     if (*nz) {
271       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
272       for (i=0; i<M; i++) { /* for each block in the block row */
273         v_i  = *v + i*bs;
274         aa_i = aa + bs2*(ai[bn] + i);
275         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
276       }
277     }
278   }
279 
280   if (idx) {
281     *idx = 0;
282     if (*nz) {
283       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
284       for (i=0; i<M; i++) { /* for each block in the block row */
285         idx_i = *idx + i*bs;
286         itmp  = bs*aj[ai[bn] + i];
287         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
288       }
289     }
290   }
291   PetscFunctionReturn(0);
292 }
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
296 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
297 {
298   int ierr;
299 
300   PetscFunctionBegin;
301   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
302   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatTranspose_SeqBAIJ"
308 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
309 {
310   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
311   Mat         C;
312   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
313   int         *rows,*cols,bs2=a->bs2;
314   Scalar      *array;
315 
316   PetscFunctionBegin;
317   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
318   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
319   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
320 
321 #if defined(PETSC_USE_MAT_SINGLE)
322   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
323   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
324 #else
325   array = a->a;
326 #endif
327 
328   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
329   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
330   ierr = PetscFree(col);CHKERRQ(ierr);
331   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
332   cols = rows + bs;
333   for (i=0; i<mbs; i++) {
334     cols[0] = i*bs;
335     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
336     len = ai[i+1] - ai[i];
337     for (j=0; j<len; j++) {
338       rows[0] = (*aj++)*bs;
339       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
340       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
341       array += bs2;
342     }
343   }
344   ierr = PetscFree(rows);CHKERRQ(ierr);
345 #if defined(PETSC_USE_MAT_SINGLE)
346   ierr = PetscFree(array);
347 #endif
348 
349   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
350   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351 
352   if (B) {
353     *B = C;
354   } else {
355     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
362 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
363 {
364   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
365   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
366   Scalar      *aa;
367   FILE        *file;
368 
369   PetscFunctionBegin;
370   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
371   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
372   col_lens[0] = MAT_COOKIE;
373 
374   col_lens[1] = A->m;
375   col_lens[2] = A->n;
376   col_lens[3] = a->nz*bs2;
377 
378   /* store lengths of each row and write (including header) to file */
379   count = 0;
380   for (i=0; i<a->mbs; i++) {
381     for (j=0; j<bs; j++) {
382       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
383     }
384   }
385   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
386   ierr = PetscFree(col_lens);CHKERRQ(ierr);
387 
388   /* store column indices (zero start index) */
389   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
390   count = 0;
391   for (i=0; i<a->mbs; i++) {
392     for (j=0; j<bs; j++) {
393       for (k=a->i[i]; k<a->i[i+1]; k++) {
394         for (l=0; l<bs; l++) {
395           jj[count++] = bs*a->j[k] + l;
396         }
397       }
398     }
399   }
400   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
401   ierr = PetscFree(jj);CHKERRQ(ierr);
402 
403   /* store nonzero values */
404   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
405   count = 0;
406   for (i=0; i<a->mbs; i++) {
407     for (j=0; j<bs; j++) {
408       for (k=a->i[i]; k<a->i[i+1]; k++) {
409         for (l=0; l<bs; l++) {
410           aa[count++] = a->a[bs2*k + l*bs + j];
411         }
412       }
413     }
414   }
415   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
416   ierr = PetscFree(aa);CHKERRQ(ierr);
417 
418   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
419   if (file) {
420     fprintf(file,"-matload_block_size %d\n",a->bs);
421   }
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
427 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
428 {
429   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
430   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
431   PetscViewerFormat format;
432 
433   PetscFunctionBegin;
434   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
435   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
436     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
437   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
438     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
439   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
440     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
441     for (i=0; i<a->mbs; i++) {
442       for (j=0; j<bs; j++) {
443         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
444         for (k=a->i[i]; k<a->i[i+1]; k++) {
445           for (l=0; l<bs; l++) {
446 #if defined(PETSC_USE_COMPLEX)
447             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
448               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
449                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
450             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
455             }
456 #else
457             if (a->a[bs2*k + l*bs + j] != 0.0) {
458               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
459             }
460 #endif
461           }
462         }
463         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
464       }
465     }
466     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
467   } else {
468     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
469     for (i=0; i<a->mbs; i++) {
470       for (j=0; j<bs; j++) {
471         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
472         for (k=a->i[i]; k<a->i[i+1]; k++) {
473           for (l=0; l<bs; l++) {
474 #if defined(PETSC_USE_COMPLEX)
475             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
476               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
477                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
478             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
479               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else {
482               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
483             }
484 #else
485             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
486 #endif
487           }
488         }
489         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
490       }
491     }
492     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
493   }
494   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
500 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
501 {
502   Mat          A = (Mat) Aa;
503   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
504   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
505   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
506   MatScalar    *aa;
507   PetscViewer  viewer;
508 
509   PetscFunctionBegin;
510 
511   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
512   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
513 
514   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
515 
516   /* loop over matrix elements drawing boxes */
517   color = PETSC_DRAW_BLUE;
518   for (i=0,row=0; i<mbs; i++,row+=bs) {
519     for (j=a->i[i]; j<a->i[i+1]; j++) {
520       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
521       x_l = a->j[j]*bs; x_r = x_l + 1.0;
522       aa = a->a + j*bs2;
523       for (k=0; k<bs; k++) {
524         for (l=0; l<bs; l++) {
525           if (PetscRealPart(*aa++) >=  0.) continue;
526           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   color = PETSC_DRAW_CYAN;
532   for (i=0,row=0; i<mbs; i++,row+=bs) {
533     for (j=a->i[i]; j<a->i[i+1]; j++) {
534       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
535       x_l = a->j[j]*bs; x_r = x_l + 1.0;
536       aa = a->a + j*bs2;
537       for (k=0; k<bs; k++) {
538         for (l=0; l<bs; l++) {
539           if (PetscRealPart(*aa++) != 0.) continue;
540           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
541         }
542       }
543     }
544   }
545 
546   color = PETSC_DRAW_RED;
547   for (i=0,row=0; i<mbs; i++,row+=bs) {
548     for (j=a->i[i]; j<a->i[i+1]; j++) {
549       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
550       x_l = a->j[j]*bs; x_r = x_l + 1.0;
551       aa = a->a + j*bs2;
552       for (k=0; k<bs; k++) {
553         for (l=0; l<bs; l++) {
554           if (PetscRealPart(*aa++) <= 0.) continue;
555           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
556         }
557       }
558     }
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
565 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
566 {
567   int          ierr;
568   PetscReal    xl,yl,xr,yr,w,h;
569   PetscDraw    draw;
570   PetscTruth   isnull;
571 
572   PetscFunctionBegin;
573 
574   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
575   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
576 
577   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
578   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
579   xr += w;    yr += h;  xl = -w;     yl = -h;
580   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
581   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
582   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatView_SeqBAIJ"
588 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
589 {
590   int        ierr;
591   PetscTruth issocket,isascii,isbinary,isdraw;
592 
593   PetscFunctionBegin;
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
598   if (issocket) {
599     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
600   } else if (isascii){
601     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
602   } else if (isbinary) {
603     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
604   } else if (isdraw) {
605     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatGetValues_SeqBAIJ"
615 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
616 {
617   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
618   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
619   int        *ai = a->i,*ailen = a->ilen;
620   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
621   MatScalar  *ap,*aa = a->a,zero = 0.0;
622 
623   PetscFunctionBegin;
624   for (k=0; k<m; k++) { /* loop over rows */
625     row  = im[k]; brow = row/bs;
626     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
627     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
628     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
629     nrow = ailen[brow];
630     for (l=0; l<n; l++) { /* loop over columns */
631       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
632       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
633       col  = in[l] ;
634       bcol = col/bs;
635       cidx = col%bs;
636       ridx = row%bs;
637       high = nrow;
638       low  = 0; /* assume unsorted */
639       while (high-low > 5) {
640         t = (low+high)/2;
641         if (rp[t] > bcol) high = t;
642         else             low  = t;
643       }
644       for (i=low; i<high; i++) {
645         if (rp[i] > bcol) break;
646         if (rp[i] == bcol) {
647           *v++ = ap[bs2*i+bs*cidx+ridx];
648           goto finished;
649         }
650       }
651       *v++ = zero;
652       finished:;
653     }
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 #if defined(PETSC_USE_MAT_SINGLE)
659 #undef __FUNCT__
660 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
661 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
662 {
663   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
664   int         ierr,i,N = m*n*b->bs2;
665   MatScalar   *vsingle;
666 
667   PetscFunctionBegin;
668   if (N > b->setvalueslen) {
669     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
670     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
671     b->setvalueslen  = N;
672   }
673   vsingle = b->setvaluescopy;
674   for (i=0; i<N; i++) {
675     vsingle[i] = v[i];
676   }
677   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 #endif
681 
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
685 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
686 {
687   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
688   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
689   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
690   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
691   PetscTruth  roworiented=a->roworiented;
692   MatScalar   *value = v,*ap,*aa = a->a,*bap;
693 
694   PetscFunctionBegin;
695   if (roworiented) {
696     stepval = (n-1)*bs;
697   } else {
698     stepval = (m-1)*bs;
699   }
700   for (k=0; k<m; k++) { /* loop over added rows */
701     row  = im[k];
702     if (row < 0) continue;
703 #if defined(PETSC_USE_BOPT_g)
704     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
705 #endif
706     rp   = aj + ai[row];
707     ap   = aa + bs2*ai[row];
708     rmax = imax[row];
709     nrow = ailen[row];
710     low  = 0;
711     for (l=0; l<n; l++) { /* loop over added columns */
712       if (in[l] < 0) continue;
713 #if defined(PETSC_USE_BOPT_g)
714       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
715 #endif
716       col = in[l];
717       if (roworiented) {
718         value = v + k*(stepval+bs)*bs + l*bs;
719       } else {
720         value = v + l*(stepval+bs)*bs + k*bs;
721       }
722       if (!sorted) low = 0; high = nrow;
723       while (high-low > 7) {
724         t = (low+high)/2;
725         if (rp[t] > col) high = t;
726         else             low  = t;
727       }
728       for (i=low; i<high; i++) {
729         if (rp[i] > col) break;
730         if (rp[i] == col) {
731           bap  = ap +  bs2*i;
732           if (roworiented) {
733             if (is == ADD_VALUES) {
734               for (ii=0; ii<bs; ii++,value+=stepval) {
735                 for (jj=ii; jj<bs2; jj+=bs) {
736                   bap[jj] += *value++;
737                 }
738               }
739             } else {
740               for (ii=0; ii<bs; ii++,value+=stepval) {
741                 for (jj=ii; jj<bs2; jj+=bs) {
742                   bap[jj] = *value++;
743                 }
744               }
745             }
746           } else {
747             if (is == ADD_VALUES) {
748               for (ii=0; ii<bs; ii++,value+=stepval) {
749                 for (jj=0; jj<bs; jj++) {
750                   *bap++ += *value++;
751                 }
752               }
753             } else {
754               for (ii=0; ii<bs; ii++,value+=stepval) {
755                 for (jj=0; jj<bs; jj++) {
756                   *bap++  = *value++;
757                 }
758               }
759             }
760           }
761           goto noinsert2;
762         }
763       }
764       if (nonew == 1) goto noinsert2;
765       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
766       if (nrow >= rmax) {
767         /* there is no extra room in row, therefore enlarge */
768         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
769         MatScalar *new_a;
770 
771         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
772 
773         /* malloc new storage space */
774         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
775 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
776         new_j   = (int*)(new_a + bs2*new_nz);
777         new_i   = new_j + new_nz;
778 
779         /* copy over old data into new slots */
780         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
781         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
782         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
783         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
784         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
785         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
786         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
787         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
788         /* free up old matrix storage */
789         ierr = PetscFree(a->a);CHKERRQ(ierr);
790         if (!a->singlemalloc) {
791           ierr = PetscFree(a->i);CHKERRQ(ierr);
792           ierr = PetscFree(a->j);CHKERRQ(ierr);
793         }
794         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
795         a->singlemalloc = PETSC_TRUE;
796 
797         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
798         rmax = imax[row] = imax[row] + CHUNKSIZE;
799         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
800         a->maxnz += bs2*CHUNKSIZE;
801         a->reallocs++;
802         a->nz++;
803       }
804       N = nrow++ - 1;
805       /* shift up all the later entries in this row */
806       for (ii=N; ii>=i; ii--) {
807         rp[ii+1] = rp[ii];
808         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
809       }
810       if (N >= i) {
811         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
812       }
813       rp[i] = col;
814       bap   = ap +  bs2*i;
815       if (roworiented) {
816         for (ii=0; ii<bs; ii++,value+=stepval) {
817           for (jj=ii; jj<bs2; jj+=bs) {
818             bap[jj] = *value++;
819           }
820         }
821       } else {
822         for (ii=0; ii<bs; ii++,value+=stepval) {
823           for (jj=0; jj<bs; jj++) {
824             *bap++  = *value++;
825           }
826         }
827       }
828       noinsert2:;
829       low = i;
830     }
831     ailen[row] = nrow;
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
839 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
840 {
841   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
842   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
843   int        m = A->m,*ip,N,*ailen = a->ilen;
844   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
845   MatScalar  *aa = a->a,*ap;
846 
847   PetscFunctionBegin;
848   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
849 
850   if (m) rmax = ailen[0];
851   for (i=1; i<mbs; i++) {
852     /* move each row back by the amount of empty slots (fshift) before it*/
853     fshift += imax[i-1] - ailen[i-1];
854     rmax   = PetscMax(rmax,ailen[i]);
855     if (fshift) {
856       ip = aj + ai[i]; ap = aa + bs2*ai[i];
857       N = ailen[i];
858       for (j=0; j<N; j++) {
859         ip[j-fshift] = ip[j];
860         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
861       }
862     }
863     ai[i] = ai[i-1] + ailen[i-1];
864   }
865   if (mbs) {
866     fshift += imax[mbs-1] - ailen[mbs-1];
867     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
868   }
869   /* reset ilen and imax for each row */
870   for (i=0; i<mbs; i++) {
871     ailen[i] = imax[i] = ai[i+1] - ai[i];
872   }
873   a->nz = ai[mbs];
874 
875   /* diagonals may have moved, so kill the diagonal pointers */
876   if (fshift && a->diag) {
877     ierr = PetscFree(a->diag);CHKERRQ(ierr);
878     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
879     a->diag = 0;
880   }
881   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
882            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
883   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
884            a->reallocs);
885   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
886   a->reallocs          = 0;
887   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
888 
889   PetscFunctionReturn(0);
890 }
891 
892 
893 
894 /*
895    This function returns an array of flags which indicate the locations of contiguous
896    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
897    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
898    Assume: sizes should be long enough to hold all the values.
899 */
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
902 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
903 {
904   int        i,j,k,row;
905   PetscTruth flg;
906 
907   PetscFunctionBegin;
908   for (i=0,j=0; i<n; j++) {
909     row = idx[i];
910     if (row%bs!=0) { /* Not the begining of a block */
911       sizes[j] = 1;
912       i++;
913     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
914       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
915       i++;
916     } else { /* Begining of the block, so check if the complete block exists */
917       flg = PETSC_TRUE;
918       for (k=1; k<bs; k++) {
919         if (row+k != idx[i+k]) { /* break in the block */
920           flg = PETSC_FALSE;
921           break;
922         }
923       }
924       if (flg == PETSC_TRUE) { /* No break in the bs */
925         sizes[j] = bs;
926         i+= bs;
927       } else {
928         sizes[j] = 1;
929         i++;
930       }
931     }
932   }
933   *bs_max = j;
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
939 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
940 {
941   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
942   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
943   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
944   Scalar      zero = 0.0;
945   MatScalar   *aa;
946 
947   PetscFunctionBegin;
948   /* Make a copy of the IS and  sort it */
949   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
950   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
951 
952   /* allocate memory for rows,sizes */
953   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
954   sizes = rows + is_n;
955 
956   /* copy IS values to rows, and sort them */
957   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
958   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
959   if (baij->keepzeroedrows) {
960     for (i=0; i<is_n; i++) { sizes[i] = 1; }
961     bs_max = is_n;
962   } else {
963     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
964   }
965   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
966 
967   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
968     row   = rows[j];
969     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
970     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
971     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
972     if (sizes[i] == bs && !baij->keepzeroedrows) {
973       if (diag) {
974         if (baij->ilen[row/bs] > 0) {
975           baij->ilen[row/bs]       = 1;
976           baij->j[baij->i[row/bs]] = row/bs;
977           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
978         }
979         /* Now insert all the diagonal values for this bs */
980         for (k=0; k<bs; k++) {
981           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
982         }
983       } else { /* (!diag) */
984         baij->ilen[row/bs] = 0;
985       } /* end (!diag) */
986     } else { /* (sizes[i] != bs) */
987 #if defined (PETSC_USE_DEBUG)
988       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
989 #endif
990       for (k=0; k<count; k++) {
991         aa[0] =  zero;
992         aa    += bs;
993       }
994       if (diag) {
995         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
996       }
997     }
998   }
999 
1000   ierr = PetscFree(rows);CHKERRQ(ierr);
1001   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1007 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1008 {
1009   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1010   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1011   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1012   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1013   int         ridx,cidx,bs2=a->bs2,ierr;
1014   PetscTruth  roworiented=a->roworiented;
1015   MatScalar   *ap,value,*aa=a->a,*bap;
1016 
1017   PetscFunctionBegin;
1018   for (k=0; k<m; k++) { /* loop over added rows */
1019     row  = im[k]; brow = row/bs;
1020     if (row < 0) continue;
1021 #if defined(PETSC_USE_BOPT_g)
1022     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1023 #endif
1024     rp   = aj + ai[brow];
1025     ap   = aa + bs2*ai[brow];
1026     rmax = imax[brow];
1027     nrow = ailen[brow];
1028     low  = 0;
1029     for (l=0; l<n; l++) { /* loop over added columns */
1030       if (in[l] < 0) continue;
1031 #if defined(PETSC_USE_BOPT_g)
1032       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1033 #endif
1034       col = in[l]; bcol = col/bs;
1035       ridx = row % bs; cidx = col % bs;
1036       if (roworiented) {
1037         value = v[l + k*n];
1038       } else {
1039         value = v[k + l*m];
1040       }
1041       if (!sorted) low = 0; high = nrow;
1042       while (high-low > 7) {
1043         t = (low+high)/2;
1044         if (rp[t] > bcol) high = t;
1045         else              low  = t;
1046       }
1047       for (i=low; i<high; i++) {
1048         if (rp[i] > bcol) break;
1049         if (rp[i] == bcol) {
1050           bap  = ap +  bs2*i + bs*cidx + ridx;
1051           if (is == ADD_VALUES) *bap += value;
1052           else                  *bap  = value;
1053           goto noinsert1;
1054         }
1055       }
1056       if (nonew == 1) goto noinsert1;
1057       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1058       if (nrow >= rmax) {
1059         /* there is no extra room in row, therefore enlarge */
1060         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1061         MatScalar *new_a;
1062 
1063         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1064 
1065         /* Malloc new storage space */
1066         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1067 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1068         new_j   = (int*)(new_a + bs2*new_nz);
1069         new_i   = new_j + new_nz;
1070 
1071         /* copy over old data into new slots */
1072         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1073         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1074         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1075         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1076         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1077         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1078         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1079         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1080         /* free up old matrix storage */
1081         ierr = PetscFree(a->a);CHKERRQ(ierr);
1082         if (!a->singlemalloc) {
1083           ierr = PetscFree(a->i);CHKERRQ(ierr);
1084           ierr = PetscFree(a->j);CHKERRQ(ierr);
1085         }
1086         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1087         a->singlemalloc = PETSC_TRUE;
1088 
1089         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1090         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1091         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1092         a->maxnz += bs2*CHUNKSIZE;
1093         a->reallocs++;
1094         a->nz++;
1095       }
1096       N = nrow++ - 1;
1097       /* shift up all the later entries in this row */
1098       for (ii=N; ii>=i; ii--) {
1099         rp[ii+1] = rp[ii];
1100         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1101       }
1102       if (N>=i) {
1103         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1104       }
1105       rp[i]                      = bcol;
1106       ap[bs2*i + bs*cidx + ridx] = value;
1107       noinsert1:;
1108       low = i;
1109     }
1110     ailen[brow] = nrow;
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1118 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1119 {
1120   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1121   Mat         outA;
1122   int         ierr;
1123   PetscTruth  row_identity,col_identity;
1124 
1125   PetscFunctionBegin;
1126   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1127   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1128   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1129   if (!row_identity || !col_identity) {
1130     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1131   }
1132 
1133   outA          = inA;
1134   inA->factor   = FACTOR_LU;
1135 
1136   if (!a->diag) {
1137     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1138   }
1139   /*
1140       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1141       for ILU(0) factorization with natural ordering
1142   */
1143   if (a->bs < 8) {
1144     ierr = MatSeqBAIJ_UpdateFactor_NaturalOrdering(inA);CHKERRQ(ierr);
1145   } else {
1146     a->row        = row;
1147     a->col        = col;
1148     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1149     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1150 
1151     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1152     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1153     PetscLogObjectParent(inA,a->icol);
1154 
1155     if (!a->solve_work) {
1156       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1157       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1158     }
1159   }
1160 
1161   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1162 
1163   PetscFunctionReturn(0);
1164 }
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1167 int MatPrintHelp_SeqBAIJ(Mat A)
1168 {
1169   static PetscTruth called = PETSC_FALSE;
1170   MPI_Comm          comm = A->comm;
1171   int               ierr;
1172 
1173   PetscFunctionBegin;
1174   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1175   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1176   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 EXTERN_C_BEGIN
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1183 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1184 {
1185   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1186   int         i,nz,n;
1187 
1188   PetscFunctionBegin;
1189   nz = baij->maxnz;
1190   n  = mat->n;
1191   for (i=0; i<nz; i++) {
1192     baij->j[i] = indices[i];
1193   }
1194   baij->nz = nz;
1195   for (i=0; i<n; i++) {
1196     baij->ilen[i] = baij->imax[i];
1197   }
1198 
1199   PetscFunctionReturn(0);
1200 }
1201 EXTERN_C_END
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1205 /*@
1206     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1207        in the matrix.
1208 
1209   Input Parameters:
1210 +  mat - the SeqBAIJ matrix
1211 -  indices - the column indices
1212 
1213   Level: advanced
1214 
1215   Notes:
1216     This can be called if you have precomputed the nonzero structure of the
1217   matrix and want to provide it to the matrix object to improve the performance
1218   of the MatSetValues() operation.
1219 
1220     You MUST have set the correct numbers of nonzeros per row in the call to
1221   MatCreateSeqBAIJ().
1222 
1223     MUST be called before any calls to MatSetValues();
1224 
1225 @*/
1226 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1227 {
1228   int ierr,(*f)(Mat,int *);
1229 
1230   PetscFunctionBegin;
1231   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1232   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1233   if (f) {
1234     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1235   } else {
1236     SETERRQ(1,"Wrong type of matrix to set column indices");
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1243 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1244 {
1245   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1246   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1247   PetscReal    atmp;
1248   Scalar       *x,zero = 0.0;
1249   MatScalar    *aa;
1250   int          ncols,brow,krow,kcol;
1251 
1252   PetscFunctionBegin;
1253   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1254   bs   = a->bs;
1255   aa   = a->a;
1256   ai   = a->i;
1257   aj   = a->j;
1258   mbs = a->mbs;
1259 
1260   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1261   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1262   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1263   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1264   for (i=0; i<mbs; i++) {
1265     ncols = ai[1] - ai[0]; ai++;
1266     brow  = bs*i;
1267     for (j=0; j<ncols; j++){
1268       /* bcol = bs*(*aj); */
1269       for (kcol=0; kcol<bs; kcol++){
1270         for (krow=0; krow<bs; krow++){
1271           atmp = PetscAbsScalar(*aa); aa++;
1272           row = brow + krow;    /* row index */
1273           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1274           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1275         }
1276       }
1277       aj++;
1278     }
1279   }
1280   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1286 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1287 {
1288   int        ierr;
1289 
1290   PetscFunctionBegin;
1291   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1297 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1298 {
1299   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1300   PetscFunctionBegin;
1301   *array = a->a;
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1307 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1308 {
1309   PetscFunctionBegin;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /* -------------------------------------------------------------------*/
1314 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1315        MatGetRow_SeqBAIJ,
1316        MatRestoreRow_SeqBAIJ,
1317        MatMult_SeqBAIJ_N,
1318        MatMultAdd_SeqBAIJ_N,
1319        MatMultTranspose_SeqBAIJ,
1320        MatMultTransposeAdd_SeqBAIJ,
1321        MatSolve_SeqBAIJ_N,
1322        0,
1323        0,
1324        0,
1325        MatLUFactor_SeqBAIJ,
1326        0,
1327        0,
1328        MatTranspose_SeqBAIJ,
1329        MatGetInfo_SeqBAIJ,
1330        MatEqual_SeqBAIJ,
1331        MatGetDiagonal_SeqBAIJ,
1332        MatDiagonalScale_SeqBAIJ,
1333        MatNorm_SeqBAIJ,
1334        0,
1335        MatAssemblyEnd_SeqBAIJ,
1336        0,
1337        MatSetOption_SeqBAIJ,
1338        MatZeroEntries_SeqBAIJ,
1339        MatZeroRows_SeqBAIJ,
1340        MatLUFactorSymbolic_SeqBAIJ,
1341        MatLUFactorNumeric_SeqBAIJ_N,
1342        0,
1343        0,
1344        MatSetUpPreallocation_SeqBAIJ,
1345        0,
1346        MatGetOwnershipRange_SeqBAIJ,
1347        MatILUFactorSymbolic_SeqBAIJ,
1348        0,
1349        MatGetArray_SeqBAIJ,
1350        MatRestoreArray_SeqBAIJ,
1351        MatDuplicate_SeqBAIJ,
1352        0,
1353        0,
1354        MatILUFactor_SeqBAIJ,
1355        0,
1356        0,
1357        MatGetSubMatrices_SeqBAIJ,
1358        MatIncreaseOverlap_SeqBAIJ,
1359        MatGetValues_SeqBAIJ,
1360        0,
1361        MatPrintHelp_SeqBAIJ,
1362        MatScale_SeqBAIJ,
1363        0,
1364        0,
1365        0,
1366        MatGetBlockSize_SeqBAIJ,
1367        MatGetRowIJ_SeqBAIJ,
1368        MatRestoreRowIJ_SeqBAIJ,
1369        0,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        MatSetValuesBlocked_SeqBAIJ,
1376        MatGetSubMatrix_SeqBAIJ,
1377        MatDestroy_SeqBAIJ,
1378        MatView_SeqBAIJ,
1379        MatGetMaps_Petsc,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        MatGetRowMax_SeqBAIJ,
1387        MatConvert_Basic};
1388 
1389 EXTERN_C_BEGIN
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1392 int MatStoreValues_SeqBAIJ(Mat mat)
1393 {
1394   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1395   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1396   int         ierr;
1397 
1398   PetscFunctionBegin;
1399   if (aij->nonew != 1) {
1400     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1401   }
1402 
1403   /* allocate space for values if not already there */
1404   if (!aij->saved_values) {
1405     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1406   }
1407 
1408   /* copy values over */
1409   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 EXTERN_C_END
1413 
1414 EXTERN_C_BEGIN
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1417 int MatRetrieveValues_SeqBAIJ(Mat mat)
1418 {
1419   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1420   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1421 
1422   PetscFunctionBegin;
1423   if (aij->nonew != 1) {
1424     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1425   }
1426   if (!aij->saved_values) {
1427     SETERRQ(1,"Must call MatStoreValues(A);first");
1428   }
1429 
1430   /* copy values over */
1431   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 EXTERN_C_END
1435 
1436 EXTERN_C_BEGIN
1437 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1438 EXTERN_C_END
1439 
1440 EXTERN_C_BEGIN
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatCreate_SeqBAIJ"
1443 int MatCreate_SeqBAIJ(Mat B)
1444 {
1445   int         ierr,size;
1446   Mat_SeqBAIJ *b;
1447 
1448   PetscFunctionBegin;
1449   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1450   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1451 
1452   B->m = B->M = PetscMax(B->m,B->M);
1453   B->n = B->N = PetscMax(B->n,B->N);
1454   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1455   B->data = (void*)b;
1456   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1457   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1458   B->factor           = 0;
1459   B->lupivotthreshold = 1.0;
1460   B->mapping          = 0;
1461   b->row              = 0;
1462   b->col              = 0;
1463   b->icol             = 0;
1464   b->reallocs         = 0;
1465   b->saved_values     = 0;
1466 #if defined(PETSC_USE_MAT_SINGLE)
1467   b->setvalueslen     = 0;
1468   b->setvaluescopy    = PETSC_NULL;
1469 #endif
1470   b->single_precision_solves = PETSC_FALSE;
1471 
1472   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1473   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1474 
1475   b->sorted           = PETSC_FALSE;
1476   b->roworiented      = PETSC_TRUE;
1477   b->nonew            = 0;
1478   b->diag             = 0;
1479   b->solve_work       = 0;
1480   b->mult_work        = 0;
1481   b->spptr            = 0;
1482   B->info.nz_unneeded = (PetscReal)b->maxnz;
1483   b->keepzeroedrows   = PETSC_FALSE;
1484 
1485   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1486                                      "MatStoreValues_SeqBAIJ",
1487                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1489                                      "MatRetrieveValues_SeqBAIJ",
1490                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1492                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1493                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1495                                      "MatConvert_SeqBAIJ_SeqAIJ",
1496                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 EXTERN_C_END
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1503 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1504 {
1505   Mat         C;
1506   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1507   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1508 
1509   PetscFunctionBegin;
1510   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1511 
1512   *B = 0;
1513   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1514   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1515   c    = (Mat_SeqBAIJ*)C->data;
1516 
1517   c->bs         = a->bs;
1518   c->bs2        = a->bs2;
1519   c->mbs        = a->mbs;
1520   c->nbs        = a->nbs;
1521   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1522 
1523   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1524   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1525   for (i=0; i<mbs; i++) {
1526     c->imax[i] = a->imax[i];
1527     c->ilen[i] = a->ilen[i];
1528   }
1529 
1530   /* allocate the matrix space */
1531   c->singlemalloc = PETSC_TRUE;
1532   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1533   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1534   c->j = (int*)(c->a + nz*bs2);
1535   c->i = c->j + nz;
1536   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1537   if (mbs > 0) {
1538     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1539     if (cpvalues == MAT_COPY_VALUES) {
1540       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1541     } else {
1542       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1543     }
1544   }
1545 
1546   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1547   c->sorted      = a->sorted;
1548   c->roworiented = a->roworiented;
1549   c->nonew       = a->nonew;
1550 
1551   if (a->diag) {
1552     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1553     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1554     for (i=0; i<mbs; i++) {
1555       c->diag[i] = a->diag[i];
1556     }
1557   } else c->diag        = 0;
1558   c->nz                 = a->nz;
1559   c->maxnz              = a->maxnz;
1560   c->solve_work         = 0;
1561   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1562   c->mult_work          = 0;
1563   C->preallocated       = PETSC_TRUE;
1564   C->assembled          = PETSC_TRUE;
1565   *B = C;
1566   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatLoad_SeqBAIJ"
1573 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1574 {
1575   Mat_SeqBAIJ  *a;
1576   Mat          B;
1577   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1578   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1579   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1580   int          *masked,nmask,tmp,bs2,ishift;
1581   Scalar       *aa;
1582   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1583 
1584   PetscFunctionBegin;
1585   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1586   bs2  = bs*bs;
1587 
1588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1589   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1590   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1591   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1592   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1593   M = header[1]; N = header[2]; nz = header[3];
1594 
1595   if (header[3] < 0) {
1596     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1597   }
1598 
1599   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1600 
1601   /*
1602      This code adds extra rows to make sure the number of rows is
1603     divisible by the blocksize
1604   */
1605   mbs        = M/bs;
1606   extra_rows = bs - M + bs*(mbs);
1607   if (extra_rows == bs) extra_rows = 0;
1608   else                  mbs++;
1609   if (extra_rows) {
1610     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1611   }
1612 
1613   /* read in row lengths */
1614   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1615   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1616   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1617 
1618   /* read in column indices */
1619   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1620   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1621   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1622 
1623   /* loop over row lengths determining block row lengths */
1624   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1625   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1626   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1627   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1628   masked   = mask + mbs;
1629   rowcount = 0; nzcount = 0;
1630   for (i=0; i<mbs; i++) {
1631     nmask = 0;
1632     for (j=0; j<bs; j++) {
1633       kmax = rowlengths[rowcount];
1634       for (k=0; k<kmax; k++) {
1635         tmp = jj[nzcount++]/bs;
1636         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1637       }
1638       rowcount++;
1639     }
1640     browlengths[i] += nmask;
1641     /* zero out the mask elements we set */
1642     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1643   }
1644 
1645   /* create our matrix */
1646   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1647   B = *A;
1648   a = (Mat_SeqBAIJ*)B->data;
1649 
1650   /* set matrix "i" values */
1651   a->i[0] = 0;
1652   for (i=1; i<= mbs; i++) {
1653     a->i[i]      = a->i[i-1] + browlengths[i-1];
1654     a->ilen[i-1] = browlengths[i-1];
1655   }
1656   a->nz         = 0;
1657   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1658 
1659   /* read in nonzero values */
1660   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1661   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1662   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1663 
1664   /* set "a" and "j" values into matrix */
1665   nzcount = 0; jcount = 0;
1666   for (i=0; i<mbs; i++) {
1667     nzcountb = nzcount;
1668     nmask    = 0;
1669     for (j=0; j<bs; j++) {
1670       kmax = rowlengths[i*bs+j];
1671       for (k=0; k<kmax; k++) {
1672         tmp = jj[nzcount++]/bs;
1673 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1674       }
1675     }
1676     /* sort the masked values */
1677     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1678 
1679     /* set "j" values into matrix */
1680     maskcount = 1;
1681     for (j=0; j<nmask; j++) {
1682       a->j[jcount++]  = masked[j];
1683       mask[masked[j]] = maskcount++;
1684     }
1685     /* set "a" values into matrix */
1686     ishift = bs2*a->i[i];
1687     for (j=0; j<bs; j++) {
1688       kmax = rowlengths[i*bs+j];
1689       for (k=0; k<kmax; k++) {
1690         tmp       = jj[nzcountb]/bs ;
1691         block     = mask[tmp] - 1;
1692         point     = jj[nzcountb] - bs*tmp;
1693         idx       = ishift + bs2*block + j + bs*point;
1694         a->a[idx] = (MatScalar)aa[nzcountb++];
1695       }
1696     }
1697     /* zero out the mask elements we set */
1698     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1699   }
1700   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1701 
1702   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1703   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1704   ierr = PetscFree(aa);CHKERRQ(ierr);
1705   ierr = PetscFree(jj);CHKERRQ(ierr);
1706   ierr = PetscFree(mask);CHKERRQ(ierr);
1707 
1708   B->assembled = PETSC_TRUE;
1709 
1710   ierr = MatView_Private(B);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 EXTERN_C_END
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatCreateSeqBAIJ"
1717 /*@C
1718    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1719    compressed row) format.  For good matrix assembly performance the
1720    user should preallocate the matrix storage by setting the parameter nz
1721    (or the array nnz).  By setting these parameters accurately, performance
1722    during matrix assembly can be increased by more than a factor of 50.
1723 
1724    Collective on MPI_Comm
1725 
1726    Input Parameters:
1727 +  comm - MPI communicator, set to PETSC_COMM_SELF
1728 .  bs - size of block
1729 .  m - number of rows
1730 .  n - number of columns
1731 .  nz - number of nonzero blocks  per block row (same for all rows)
1732 -  nnz - array containing the number of nonzero blocks in the various block rows
1733          (possibly different for each block row) or PETSC_NULL
1734 
1735    Output Parameter:
1736 .  A - the matrix
1737 
1738    Options Database Keys:
1739 .   -mat_no_unroll - uses code that does not unroll the loops in the
1740                      block calculations (much slower)
1741 .    -mat_block_size - size of the blocks to use
1742 
1743    Level: intermediate
1744 
1745    Notes:
1746    A nonzero block is any block that as 1 or more nonzeros in it
1747 
1748    The block AIJ format is fully compatible with standard Fortran 77
1749    storage.  That is, the stored row and column indices can begin at
1750    either one (as in Fortran) or zero.  See the users' manual for details.
1751 
1752    Specify the preallocated storage with either nz or nnz (not both).
1753    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1754    allocation.  For additional details, see the users manual chapter on
1755    matrices.
1756 
1757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1758 @*/
1759 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1760 {
1761   int ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1765   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1766   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1772 /*@C
1773    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1774    per row in the matrix. For good matrix assembly performance the
1775    user should preallocate the matrix storage by setting the parameter nz
1776    (or the array nnz).  By setting these parameters accurately, performance
1777    during matrix assembly can be increased by more than a factor of 50.
1778 
1779    Collective on MPI_Comm
1780 
1781    Input Parameters:
1782 +  A - the matrix
1783 .  bs - size of block
1784 .  nz - number of block nonzeros per block row (same for all rows)
1785 -  nnz - array containing the number of block nonzeros in the various block rows
1786          (possibly different for each block row) or PETSC_NULL
1787 
1788    Options Database Keys:
1789 .   -mat_no_unroll - uses code that does not unroll the loops in the
1790                      block calculations (much slower)
1791 .    -mat_block_size - size of the blocks to use
1792 
1793    Level: intermediate
1794 
1795    Notes:
1796    The block AIJ format is fully compatible with standard Fortran 77
1797    storage.  That is, the stored row and column indices can begin at
1798    either one (as in Fortran) or zero.  See the users' manual for details.
1799 
1800    Specify the preallocated storage with either nz or nnz (not both).
1801    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1802    allocation.  For additional details, see the users manual chapter on
1803    matrices.
1804 
1805 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1806 @*/
1807 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1808 {
1809   Mat_SeqBAIJ *b;
1810   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1811   PetscTruth  flg;
1812 
1813   PetscFunctionBegin;
1814   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1815   if (!flg) PetscFunctionReturn(0);
1816 
1817   B->preallocated = PETSC_TRUE;
1818   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1819   if (nnz && newbs != bs) {
1820     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1821   }
1822   bs   = newbs;
1823 
1824   mbs  = B->m/bs;
1825   nbs  = B->n/bs;
1826   bs2  = bs*bs;
1827 
1828   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1829     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1830   }
1831 
1832   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1833   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1834   if (nnz) {
1835     for (i=0; i<mbs; i++) {
1836       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1837       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1838     }
1839   }
1840 
1841   b       = (Mat_SeqBAIJ*)B->data;
1842   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1843   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1844   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1845   if (!flg) {
1846     switch (bs) {
1847     case 1:
1848       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1849       B->ops->mult            = MatMult_SeqBAIJ_1;
1850       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1851       break;
1852     case 2:
1853       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1854       B->ops->mult            = MatMult_SeqBAIJ_2;
1855       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1856       break;
1857     case 3:
1858       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1859       B->ops->mult            = MatMult_SeqBAIJ_3;
1860       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1861       break;
1862     case 4:
1863       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1864       B->ops->mult            = MatMult_SeqBAIJ_4;
1865       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1866       break;
1867     case 5:
1868       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1869       B->ops->mult            = MatMult_SeqBAIJ_5;
1870       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1871       break;
1872     case 6:
1873       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1874       B->ops->mult            = MatMult_SeqBAIJ_6;
1875       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1876       break;
1877     case 7:
1878       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1879       B->ops->mult            = MatMult_SeqBAIJ_7;
1880       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1881       break;
1882     default:
1883       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1884       B->ops->mult            = MatMult_SeqBAIJ_N;
1885       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1886       break;
1887     }
1888   }
1889   b->bs      = bs;
1890   b->mbs     = mbs;
1891   b->nbs     = nbs;
1892   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1893   if (!nnz) {
1894     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1895     else if (nz <= 0)        nz = 1;
1896     for (i=0; i<mbs; i++) b->imax[i] = nz;
1897     nz = nz*mbs;
1898   } else {
1899     nz = 0;
1900     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1901   }
1902 
1903   /* allocate the matrix space */
1904   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1905   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1906   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1907   b->j  = (int*)(b->a + nz*bs2);
1908   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1909   b->i  = b->j + nz;
1910   b->singlemalloc = PETSC_TRUE;
1911 
1912   b->i[0] = 0;
1913   for (i=1; i<mbs+1; i++) {
1914     b->i[i] = b->i[i-1] + b->imax[i-1];
1915   }
1916 
1917   /* b->ilen will count nonzeros in each block row so far. */
1918   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1919   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1920   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1921 
1922   b->bs               = bs;
1923   b->bs2              = bs2;
1924   b->mbs              = mbs;
1925   b->nz               = 0;
1926   b->maxnz            = nz*bs2;
1927   B->info.nz_unneeded = (PetscReal)b->maxnz;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931