xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f1e2ffcdf65679ed52caf95ee2fbac3db31d26d8)
1 /*$Id: baij.c,v 1.192 1999/11/20 18:36:47 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         *diag, *jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
37 int MatMarkDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   if (a->diag) PetscFunctionReturn(0);
44 
45   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
46   PLogObjectMemory(A,(m+1)*sizeof(int));
47   for ( i=0; i<m; i++ ) {
48     diag[i] = a->i[i+1];
49     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
50       if (a->j[j] == i) {
51         diag[i] = j;
52         break;
53       }
54     }
55   }
56   a->diag = diag;
57   PetscFunctionReturn(0);
58 }
59 
60 
61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
62 
63 #undef __FUNC__
64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
66                             PetscTruth *done)
67 {
68   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
69   int         ierr, n = a->mbs,i;
70 
71   PetscFunctionBegin;
72   *nn = n;
73   if (!ia) PetscFunctionReturn(0);
74   if (symmetric) {
75     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
76   } else if (oshift == 1) {
77     /* temporarily add 1 to i and j indices */
78     int nz = a->i[n] + 1;
79     for ( i=0; i<nz; i++ ) a->j[i]++;
80     for ( i=0; i<n+1; i++ ) a->i[i]++;
81     *ia = a->i; *ja = a->j;
82   } else {
83     *ia = a->i; *ja = a->j;
84   }
85 
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
91 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
92                                 PetscTruth *done)
93 {
94   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
95   int         i,n = a->mbs,ierr;
96 
97   PetscFunctionBegin;
98   if (!ia) PetscFunctionReturn(0);
99   if (symmetric) {
100     ierr = PetscFree(*ia);CHKERRQ(ierr);
101     ierr = PetscFree(*ja);CHKERRQ(ierr);
102   } else if (oshift == 1) {
103     int nz = a->i[n];
104     for ( i=0; i<nz; i++ ) a->j[i]--;
105     for ( i=0; i<n+1; i++ ) a->i[i]--;
106   }
107   PetscFunctionReturn(0);
108 }
109 
110 #undef __FUNC__
111 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
112 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
113 {
114   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
115 
116   PetscFunctionBegin;
117   *bs = baij->bs;
118   PetscFunctionReturn(0);
119 }
120 
121 
122 #undef __FUNC__
123 #define __FUNC__ "MatDestroy_SeqBAIJ"
124 int MatDestroy_SeqBAIJ(Mat A)
125 {
126   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
127   int         ierr;
128 
129   PetscFunctionBegin;
130   if (A->mapping) {
131     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
132   }
133   if (A->bmapping) {
134     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
135   }
136   if (A->rmap) {
137     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
138   }
139   if (A->cmap) {
140     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
141   }
142 #if defined(PETSC_USE_LOG)
143   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
144 #endif
145   ierr = PetscFree(a->a);CHKERRQ(ierr);
146   if (!a->singlemalloc) {
147     ierr = PetscFree(a->i);CHKERRQ(ierr);
148     ierr = PetscFree(a->j);CHKERRQ(ierr);
149   }
150   if (a->row) {
151     ierr = ISDestroy(a->row);CHKERRQ(ierr);
152   }
153   if (a->col) {
154     ierr = ISDestroy(a->col);CHKERRQ(ierr);
155   }
156   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
157   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
158   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
159   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
160   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
161   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
162   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
163   ierr = PetscFree(a);CHKERRQ(ierr);
164   PLogObjectDestroy(A);
165   PetscHeaderDestroy(A);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNC__
170 #define __FUNC__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
174 
175   PetscFunctionBegin;
176   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
177   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
178   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
179   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
180   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
181   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
182   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
183   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
184   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
185   else if (op == MAT_ROWS_SORTED ||
186            op == MAT_ROWS_UNSORTED ||
187            op == MAT_SYMMETRIC ||
188            op == MAT_STRUCTURALLY_SYMMETRIC ||
189            op == MAT_YES_NEW_DIAGONALS ||
190            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
191            op == MAT_USE_HASH_TABLE) {
192     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
193   } else if (op == MAT_NO_NEW_DIAGONALS) {
194     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
195   } else {
196     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
197   }
198   PetscFunctionReturn(0);
199 }
200 
201 
202 #undef __FUNC__
203 #define __FUNC__ "MatGetSize_SeqBAIJ"
204 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
205 {
206   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
207 
208   PetscFunctionBegin;
209   if (m) *m = a->m;
210   if (n) *n = a->n;
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNC__
215 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
216 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
217 {
218   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
219 
220   PetscFunctionBegin;
221   *m = 0; *n = a->m;
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNC__
226 #define __FUNC__ "MatGetRow_SeqBAIJ"
227 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
228 {
229   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
230   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
231   MatScalar    *aa,*aa_i;
232   Scalar       *v_i;
233 
234   PetscFunctionBegin;
235   bs  = a->bs;
236   ai  = a->i;
237   aj  = a->j;
238   aa  = a->a;
239   bs2 = a->bs2;
240 
241   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
242 
243   bn  = row/bs;   /* Block number */
244   bp  = row % bs; /* Block Position */
245   M   = ai[bn+1] - ai[bn];
246   *nz = bs*M;
247 
248   if (v) {
249     *v = 0;
250     if (*nz) {
251       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
252       for ( i=0; i<M; i++ ) { /* for each block in the block row */
253         v_i  = *v + i*bs;
254         aa_i = aa + bs2*(ai[bn] + i);
255         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
256       }
257     }
258   }
259 
260   if (idx) {
261     *idx = 0;
262     if (*nz) {
263       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
264       for ( i=0; i<M; i++ ) { /* for each block in the block row */
265         idx_i = *idx + i*bs;
266         itmp  = bs*aj[ai[bn] + i];
267         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNC__
275 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
276 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
277 {
278   int ierr;
279 
280   PetscFunctionBegin;
281   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
282   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNC__
287 #define __FUNC__ "MatTranspose_SeqBAIJ"
288 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
289 {
290   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
291   Mat         C;
292   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
293   int         *rows,*cols,bs2=a->bs2;
294   MatScalar   *array = a->a;
295 
296   PetscFunctionBegin;
297   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
298   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
299   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
300 
301   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
302   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
303   ierr = PetscFree(col);CHKERRQ(ierr);
304   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
305   cols = rows + bs;
306   for ( i=0; i<mbs; i++ ) {
307     cols[0] = i*bs;
308     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
309     len = ai[i+1] - ai[i];
310     for ( j=0; j<len; j++ ) {
311       rows[0] = (*aj++)*bs;
312       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
313       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
314       array += bs2;
315     }
316   }
317   ierr = PetscFree(rows);CHKERRQ(ierr);
318 
319   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321 
322   if (B) {
323     *B = C;
324   } else {
325     PetscOps *Abops;
326     MatOps   Aops;
327 
328     /* This isn't really an in-place transpose */
329     ierr = PetscFree(a->a);CHKERRQ(ierr);
330     if (!a->singlemalloc) {
331       ierr = PetscFree(a->i);CHKERRQ(ierr);
332       ierr = PetscFree(a->j);CHKERRQ(ierr);
333     }
334     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
335     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
336     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
337     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
338     ierr = PetscFree(a);CHKERRQ(ierr);
339 
340 
341     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
342     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
343 
344     /*
345        This is horrible, horrible code. We need to keep the
346       A pointers for the bops and ops but copy everything
347       else from C.
348     */
349     Abops    = A->bops;
350     Aops     = A->ops;
351     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
352     A->bops  = Abops;
353     A->ops   = Aops;
354     A->qlist = 0;
355 
356     PetscHeaderDestroy(C);
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNC__
362 #define __FUNC__ "MatView_SeqBAIJ_Binary"
363 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
364 {
365   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
366   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
367   Scalar      *aa;
368   FILE        *file;
369 
370   PetscFunctionBegin;
371   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
372   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
373   col_lens[0] = MAT_COOKIE;
374 
375   col_lens[1] = a->m;
376   col_lens[2] = a->n;
377   col_lens[3] = a->nz*bs2;
378 
379   /* store lengths of each row and write (including header) to file */
380   count = 0;
381   for ( i=0; i<a->mbs; i++ ) {
382     for ( j=0; j<bs; j++ ) {
383       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
384     }
385   }
386   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
387   ierr = PetscFree(col_lens);CHKERRQ(ierr);
388 
389   /* store column indices (zero start index) */
390   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
391   count = 0;
392   for ( i=0; i<a->mbs; i++ ) {
393     for ( j=0; j<bs; j++ ) {
394       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
395         for ( l=0; l<bs; l++ ) {
396           jj[count++] = bs*a->j[k] + l;
397         }
398       }
399     }
400   }
401   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
402   ierr = PetscFree(jj);CHKERRQ(ierr);
403 
404   /* store nonzero values */
405   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
406   count = 0;
407   for ( i=0; i<a->mbs; i++ ) {
408     for ( j=0; j<bs; j++ ) {
409       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
410         for ( l=0; l<bs; l++ ) {
411           aa[count++] = a->a[bs2*k + l*bs + j];
412         }
413       }
414     }
415   }
416   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
417   ierr = PetscFree(aa);CHKERRQ(ierr);
418 
419   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
420   if (file) {
421     fprintf(file,"-matload_block_size %d\n",a->bs);
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNC__
427 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
428 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
429 {
430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
431   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
432   char        *outputname;
433 
434   PetscFunctionBegin;
435   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
436   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
437   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
438     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
439   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
440     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
441   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
442     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
443     for ( i=0; i<a->mbs; i++ ) {
444       for ( j=0; j<bs; j++ ) {
445         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
446         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
447           for ( l=0; l<bs; l++ ) {
448 #if defined(PETSC_USE_COMPLEX)
449             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
450               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
451                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
453               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
454                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
455             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
456               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
457             }
458 #else
459             if (a->a[bs2*k + l*bs + j] != 0.0) {
460               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
461             }
462 #endif
463           }
464         }
465         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
466       }
467     }
468     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
469   } else {
470     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
471     for ( i=0; i<a->mbs; i++ ) {
472       for ( j=0; j<bs; j++ ) {
473         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
474         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
475           for ( l=0; l<bs; l++ ) {
476 #if defined(PETSC_USE_COMPLEX)
477             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
478               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
479                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
481               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
482                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
483             } else {
484               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
485             }
486 #else
487             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
488 #endif
489           }
490         }
491         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
492       }
493     }
494     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
495   }
496   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNC__
501 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
502 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
503 {
504   Mat          A = (Mat) Aa;
505   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
506   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
507   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
508   MatScalar    *aa;
509   MPI_Comm     comm;
510   Viewer       viewer;
511 
512   PetscFunctionBegin;
513   /*
514       This is nasty. If this is called from an originally parallel matrix
515    then all processes call this, but only the first has the matrix so the
516    rest should return immediately.
517   */
518   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
519   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
520   if (rank) PetscFunctionReturn(0);
521 
522   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
523 
524   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
525 
526   /* loop over matrix elements drawing boxes */
527   color = DRAW_BLUE;
528   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
529     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
530       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
531       x_l = a->j[j]*bs; x_r = x_l + 1.0;
532       aa = a->a + j*bs2;
533       for ( k=0; k<bs; k++ ) {
534         for ( l=0; l<bs; l++ ) {
535           if (PetscReal(*aa++) >=  0.) continue;
536           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
537         }
538       }
539     }
540   }
541   color = DRAW_CYAN;
542   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
543     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
544       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
545       x_l = a->j[j]*bs; x_r = x_l + 1.0;
546       aa = a->a + j*bs2;
547       for ( k=0; k<bs; k++ ) {
548         for ( l=0; l<bs; l++ ) {
549           if (PetscReal(*aa++) != 0.) continue;
550           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 
556   color = DRAW_RED;
557   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
558     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
559       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
560       x_l = a->j[j]*bs; x_r = x_l + 1.0;
561       aa = a->a + j*bs2;
562       for ( k=0; k<bs; k++ ) {
563         for ( l=0; l<bs; l++ ) {
564           if (PetscReal(*aa++) <= 0.) continue;
565           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNC__
574 #define __FUNC__ "MatView_SeqBAIJ_Draw"
575 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
576 {
577   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
578   int          ierr;
579   double       xl,yl,xr,yr,w,h;
580   Draw         draw;
581   PetscTruth   isnull;
582 
583   PetscFunctionBegin;
584 
585   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
586   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
587 
588   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
589   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
590   xr += w;    yr += h;  xl = -w;     yl = -h;
591   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
592   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
593   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNC__
598 #define __FUNC__ "MatView_SeqBAIJ"
599 int MatView_SeqBAIJ(Mat A,Viewer viewer)
600 {
601   int        ierr;
602   PetscTruth issocket,isascii,isbinary,isdraw;
603 
604   PetscFunctionBegin;
605   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
608   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
609   if (issocket) {
610     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
611   } else if (isascii){
612     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
613   } else if (isbinary) {
614     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
615   } else if (isdraw) {
616     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
617   } else {
618     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatGetValues_SeqBAIJ"
626 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
627 {
628   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
629   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
630   int        *ai = a->i, *ailen = a->ilen;
631   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
632   MatScalar  *ap, *aa = a->a, zero = 0.0;
633 
634   PetscFunctionBegin;
635   for ( k=0; k<m; k++ ) { /* loop over rows */
636     row  = im[k]; brow = row/bs;
637     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
638     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
639     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
640     nrow = ailen[brow];
641     for ( l=0; l<n; l++ ) { /* loop over columns */
642       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
643       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
644       col  = in[l] ;
645       bcol = col/bs;
646       cidx = col%bs;
647       ridx = row%bs;
648       high = nrow;
649       low  = 0; /* assume unsorted */
650       while (high-low > 5) {
651         t = (low+high)/2;
652         if (rp[t] > bcol) high = t;
653         else             low  = t;
654       }
655       for ( i=low; i<high; i++ ) {
656         if (rp[i] > bcol) break;
657         if (rp[i] == bcol) {
658           *v++ = ap[bs2*i+bs*cidx+ridx];
659           goto finished;
660         }
661       }
662       *v++ = zero;
663       finished:;
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 
670 #undef __FUNC__
671 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
672 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
673 {
674   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
675   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
676   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
677   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
678   Scalar      *value = v;
679   MatScalar   *ap,*aa=a->a,*bap;
680 
681   PetscFunctionBegin;
682   if (roworiented) {
683     stepval = (n-1)*bs;
684   } else {
685     stepval = (m-1)*bs;
686   }
687   for ( k=0; k<m; k++ ) { /* loop over added rows */
688     row  = im[k];
689     if (row < 0) continue;
690 #if defined(PETSC_USE_BOPT_g)
691     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
692 #endif
693     rp   = aj + ai[row];
694     ap   = aa + bs2*ai[row];
695     rmax = imax[row];
696     nrow = ailen[row];
697     low  = 0;
698     for ( l=0; l<n; l++ ) { /* loop over added columns */
699       if (in[l] < 0) continue;
700 #if defined(PETSC_USE_BOPT_g)
701       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
702 #endif
703       col = in[l];
704       if (roworiented) {
705         value = v + k*(stepval+bs)*bs + l*bs;
706       } else {
707         value = v + l*(stepval+bs)*bs + k*bs;
708       }
709       if (!sorted) low = 0; high = nrow;
710       while (high-low > 7) {
711         t = (low+high)/2;
712         if (rp[t] > col) high = t;
713         else             low  = t;
714       }
715       for ( i=low; i<high; i++ ) {
716         if (rp[i] > col) break;
717         if (rp[i] == col) {
718           bap  = ap +  bs2*i;
719           if (roworiented) {
720             if (is == ADD_VALUES) {
721               for ( ii=0; ii<bs; ii++,value+=stepval ) {
722                 for (jj=ii; jj<bs2; jj+=bs ) {
723                   bap[jj] += *value++;
724                 }
725               }
726             } else {
727               for ( ii=0; ii<bs; ii++,value+=stepval ) {
728                 for (jj=ii; jj<bs2; jj+=bs ) {
729                   bap[jj] = *value++;
730                 }
731               }
732             }
733           } else {
734             if (is == ADD_VALUES) {
735               for ( ii=0; ii<bs; ii++,value+=stepval ) {
736                 for (jj=0; jj<bs; jj++ ) {
737                   *bap++ += *value++;
738                 }
739               }
740             } else {
741               for ( ii=0; ii<bs; ii++,value+=stepval ) {
742                 for (jj=0; jj<bs; jj++ ) {
743                   *bap++  = *value++;
744                 }
745               }
746             }
747           }
748           goto noinsert2;
749         }
750       }
751       if (nonew == 1) goto noinsert2;
752       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
753       if (nrow >= rmax) {
754         /* there is no extra room in row, therefore enlarge */
755         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
756         MatScalar *new_a;
757 
758         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
759 
760         /* malloc new storage space */
761         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
762         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
763         new_j   = (int *) (new_a + bs2*new_nz);
764         new_i   = new_j + new_nz;
765 
766         /* copy over old data into new slots */
767         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
768         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
769         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
770         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
771         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
772         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
773         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
774         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
775         /* free up old matrix storage */
776         ierr = PetscFree(a->a);CHKERRQ(ierr);
777         if (!a->singlemalloc) {
778           ierr = PetscFree(a->i);CHKERRQ(ierr);
779           ierr = PetscFree(a->j);CHKERRQ(ierr);
780         }
781         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
782         a->singlemalloc = PETSC_TRUE;
783 
784         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
785         rmax = imax[row] = imax[row] + CHUNKSIZE;
786         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
787         a->maxnz += bs2*CHUNKSIZE;
788         a->reallocs++;
789         a->nz++;
790       }
791       N = nrow++ - 1;
792       /* shift up all the later entries in this row */
793       for ( ii=N; ii>=i; ii-- ) {
794         rp[ii+1] = rp[ii];
795         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
796       }
797       if (N >= i) {
798         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
799       }
800       rp[i] = col;
801       bap   = ap +  bs2*i;
802       if (roworiented) {
803         for ( ii=0; ii<bs; ii++,value+=stepval) {
804           for (jj=ii; jj<bs2; jj+=bs ) {
805             bap[jj] = *value++;
806           }
807         }
808       } else {
809         for ( ii=0; ii<bs; ii++,value+=stepval ) {
810           for (jj=0; jj<bs; jj++ ) {
811             *bap++  = *value++;
812           }
813         }
814       }
815       noinsert2:;
816       low = i;
817     }
818     ailen[row] = nrow;
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 
824 #undef __FUNC__
825 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
826 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
827 {
828   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
829   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
830   int        m = a->m,*ip, N, *ailen = a->ilen;
831   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
832   MatScalar  *aa = a->a, *ap;
833 
834   PetscFunctionBegin;
835   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
836 
837   if (m) rmax = ailen[0];
838   for ( i=1; i<mbs; i++ ) {
839     /* move each row back by the amount of empty slots (fshift) before it*/
840     fshift += imax[i-1] - ailen[i-1];
841     rmax   = PetscMax(rmax,ailen[i]);
842     if (fshift) {
843       ip = aj + ai[i]; ap = aa + bs2*ai[i];
844       N = ailen[i];
845       for ( j=0; j<N; j++ ) {
846         ip[j-fshift] = ip[j];
847         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
848       }
849     }
850     ai[i] = ai[i-1] + ailen[i-1];
851   }
852   if (mbs) {
853     fshift += imax[mbs-1] - ailen[mbs-1];
854     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
855   }
856   /* reset ilen and imax for each row */
857   for ( i=0; i<mbs; i++ ) {
858     ailen[i] = imax[i] = ai[i+1] - ai[i];
859   }
860   a->nz = ai[mbs];
861 
862   /* diagonals may have moved, so kill the diagonal pointers */
863   if (fshift && a->diag) {
864     ierr = PetscFree(a->diag);CHKERRQ(ierr);
865     PLogObjectMemory(A,-(m+1)*sizeof(int));
866     a->diag = 0;
867   }
868   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
869            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
870   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
871            a->reallocs);
872   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
873   a->reallocs          = 0;
874   A->info.nz_unneeded  = (double)fshift*bs2;
875 
876   PetscFunctionReturn(0);
877 }
878 
879 
880 
881 /*
882    This function returns an array of flags which indicate the locations of contiguous
883    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
884    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
885    Assume: sizes should be long enough to hold all the values.
886 */
887 #undef __FUNC__
888 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
889 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
890 {
891   int        i,j,k,row;
892   PetscTruth flg;
893 
894   PetscFunctionBegin;
895   for ( i=0,j=0; i<n; j++ ) {
896     row = idx[i];
897     if (row%bs!=0) { /* Not the begining of a block */
898       sizes[j] = 1;
899       i++;
900     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
901       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
902       i++;
903     } else { /* Begining of the block, so check if the complete block exists */
904       flg = PETSC_TRUE;
905       for ( k=1; k<bs; k++ ) {
906         if (row+k != idx[i+k]) { /* break in the block */
907           flg = PETSC_FALSE;
908           break;
909         }
910       }
911       if (flg == PETSC_TRUE) { /* No break in the bs */
912         sizes[j] = bs;
913         i+= bs;
914       } else {
915         sizes[j] = 1;
916         i++;
917       }
918     }
919   }
920   *bs_max = j;
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNC__
925 #define __FUNC__ "MatZeroRows_SeqBAIJ"
926 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
927 {
928   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
929   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
930   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
931   Scalar      zero = 0.0;
932   MatScalar   *aa;
933 
934   PetscFunctionBegin;
935   /* Make a copy of the IS and  sort it */
936   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
937   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
938 
939   /* allocate memory for rows,sizes */
940   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
941   sizes = rows + is_n;
942 
943   /* initialize copy IS valurs to rows, and sort them */
944   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
945   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
946   if (baij->keepzeroedrows) {
947     for ( i=0; i<is_n; i++ ) { sizes[i] = 1; }
948     bs_max = is_n;
949   } else {
950     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
951   }
952   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
953 
954   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
955     row   = rows[j];
956     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
957     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
958     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
959     if (sizes[i] == bs && !baij->keepzeroedrows) {
960       if (diag) {
961         if (baij->ilen[row/bs] > 0) {
962           baij->ilen[row/bs] = 1;
963           baij->j[baij->i[row/bs]] = row/bs;
964           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
965         }
966         /* Now insert all the diagoanl values for this bs */
967         for ( k=0; k<bs; k++ ) {
968           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
969         }
970       } else { /* (!diag) */
971         baij->ilen[row/bs] = 0;
972       } /* end (!diag) */
973     } else { /* (sizes[i] != bs) */
974 #if defined (PETSC_USE_DEBUG)
975       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
976 #endif
977       for ( k=0; k<count; k++ ) {
978         aa[0] = zero;
979         aa+=bs;
980       }
981       if (diag) {
982         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
983       }
984     }
985   }
986 
987   ierr = PetscFree(rows);CHKERRQ(ierr);
988   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNC__
993 #define __FUNC__ "MatSetValues_SeqBAIJ"
994 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
995 {
996   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
997   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
998   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
999   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1000   int         ridx,cidx,bs2=a->bs2,ierr;
1001   MatScalar   *ap,value,*aa=a->a,*bap;
1002 
1003   PetscFunctionBegin;
1004   for ( k=0; k<m; k++ ) { /* loop over added rows */
1005     row  = im[k]; brow = row/bs;
1006     if (row < 0) continue;
1007 #if defined(PETSC_USE_BOPT_g)
1008     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1009 #endif
1010     rp   = aj + ai[brow];
1011     ap   = aa + bs2*ai[brow];
1012     rmax = imax[brow];
1013     nrow = ailen[brow];
1014     low  = 0;
1015     for ( l=0; l<n; l++ ) { /* loop over added columns */
1016       if (in[l] < 0) continue;
1017 #if defined(PETSC_USE_BOPT_g)
1018       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1019 #endif
1020       col = in[l]; bcol = col/bs;
1021       ridx = row % bs; cidx = col % bs;
1022       if (roworiented) {
1023         value = v[l + k*n];
1024       } else {
1025         value = v[k + l*m];
1026       }
1027       if (!sorted) low = 0; high = nrow;
1028       while (high-low > 7) {
1029         t = (low+high)/2;
1030         if (rp[t] > bcol) high = t;
1031         else              low  = t;
1032       }
1033       for ( i=low; i<high; i++ ) {
1034         if (rp[i] > bcol) break;
1035         if (rp[i] == bcol) {
1036           bap  = ap +  bs2*i + bs*cidx + ridx;
1037           if (is == ADD_VALUES) *bap += value;
1038           else                  *bap  = value;
1039           goto noinsert1;
1040         }
1041       }
1042       if (nonew == 1) goto noinsert1;
1043       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1044       if (nrow >= rmax) {
1045         /* there is no extra room in row, therefore enlarge */
1046         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1047         MatScalar *new_a;
1048 
1049         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1050 
1051         /* Malloc new storage space */
1052         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1053         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1054         new_j   = (int *) (new_a + bs2*new_nz);
1055         new_i   = new_j + new_nz;
1056 
1057         /* copy over old data into new slots */
1058         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1059         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1060         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1061         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1062         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1063         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1064         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1065         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1066         /* free up old matrix storage */
1067         ierr = PetscFree(a->a);CHKERRQ(ierr);
1068         if (!a->singlemalloc) {
1069           ierr = PetscFree(a->i);CHKERRQ(ierr);
1070           ierr = PetscFree(a->j);CHKERRQ(ierr);
1071         }
1072         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1073         a->singlemalloc = PETSC_TRUE;
1074 
1075         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1076         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1077         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1078         a->maxnz += bs2*CHUNKSIZE;
1079         a->reallocs++;
1080         a->nz++;
1081       }
1082       N = nrow++ - 1;
1083       /* shift up all the later entries in this row */
1084       for ( ii=N; ii>=i; ii-- ) {
1085         rp[ii+1] = rp[ii];
1086         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1087       }
1088       if (N>=i) {
1089         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1090       }
1091       rp[i]                      = bcol;
1092       ap[bs2*i + bs*cidx + ridx] = value;
1093       noinsert1:;
1094       low = i;
1095     }
1096     ailen[brow] = nrow;
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1102 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1103 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1104 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1105 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1106 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1107 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1108 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1109 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1110 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1111 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1112 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1113 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1114 extern int MatZeroEntries_SeqBAIJ(Mat);
1115 
1116 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1117 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1118 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1119 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1120 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1121 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1122 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1123 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1124 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec);
1125 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec);
1126 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec);
1127 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec);
1128 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec);
1129 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec);
1130 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec);
1131 
1132 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1133 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1134 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1135 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1136 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1137 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1138 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1139 
1140 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1141 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1142 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1143 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1144 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1145 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1146 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1147 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1148 
1149 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1150 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1151 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1152 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1153 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1154 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1155 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1156 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1157 
1158 #undef __FUNC__
1159 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1160 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1161 {
1162   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1163   Mat         outA;
1164   int         ierr;
1165   PetscTruth  row_identity, col_identity;
1166 
1167   PetscFunctionBegin;
1168   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1169   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1170   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1171   if (!row_identity || !col_identity) {
1172     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1173   }
1174 
1175   outA          = inA;
1176   inA->factor   = FACTOR_LU;
1177 
1178   if (!a->diag) {
1179     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1180   }
1181   /*
1182       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1183       for ILU(0) factorization with natural ordering
1184   */
1185   switch (a->bs) {
1186   case 1:
1187     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1188     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1189   case 2:
1190     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1191     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1192     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1193     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1194     break;
1195   case 3:
1196     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1197     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1198     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3_NaturalOrdering;
1199     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1200     break;
1201   case 4:
1202     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1203     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1204     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4_NaturalOrdering;
1205     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1206     break;
1207   case 5:
1208     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1209     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1210     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5_NaturalOrdering;
1211     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1212     break;
1213   case 6:
1214     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1215     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1216     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6_NaturalOrdering;
1217     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1218     break;
1219   case 7:
1220     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1221     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7_NaturalOrdering;
1222     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1223     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1224     break;
1225   default:
1226     a->row        = row;
1227     a->col        = col;
1228     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1229     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1230 
1231     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1232     ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1233     PLogObjectParent(inA,a->icol);
1234 
1235     if (!a->solve_work) {
1236       a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1237       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1238     }
1239   }
1240 
1241   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1242 
1243   PetscFunctionReturn(0);
1244 }
1245 #undef __FUNC__
1246 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1247 int MatPrintHelp_SeqBAIJ(Mat A)
1248 {
1249   static PetscTruth called = PETSC_FALSE;
1250   MPI_Comm          comm = A->comm;
1251   int               ierr;
1252 
1253   PetscFunctionBegin;
1254   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1255   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1256   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 EXTERN_C_BEGIN
1261 #undef __FUNC__
1262 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1263 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1264 {
1265   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1266   int         i,nz,n;
1267 
1268   PetscFunctionBegin;
1269   nz = baij->maxnz;
1270   n  = baij->n;
1271   for (i=0; i<nz; i++) {
1272     baij->j[i] = indices[i];
1273   }
1274   baij->nz = nz;
1275   for ( i=0; i<n; i++ ) {
1276     baij->ilen[i] = baij->imax[i];
1277   }
1278 
1279   PetscFunctionReturn(0);
1280 }
1281 EXTERN_C_END
1282 
1283 #undef __FUNC__
1284 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1285 /*@
1286     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1287        in the matrix.
1288 
1289   Input Parameters:
1290 +  mat - the SeqBAIJ matrix
1291 -  indices - the column indices
1292 
1293   Level: advanced
1294 
1295   Notes:
1296     This can be called if you have precomputed the nonzero structure of the
1297   matrix and want to provide it to the matrix object to improve the performance
1298   of the MatSetValues() operation.
1299 
1300     You MUST have set the correct numbers of nonzeros per row in the call to
1301   MatCreateSeqBAIJ().
1302 
1303     MUST be called before any calls to MatSetValues();
1304 
1305 @*/
1306 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1307 {
1308   int ierr,(*f)(Mat,int *);
1309 
1310   PetscFunctionBegin;
1311   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1312   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1313   if (f) {
1314     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1315   } else {
1316     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /* -------------------------------------------------------------------*/
1322 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1323        MatGetRow_SeqBAIJ,
1324        MatRestoreRow_SeqBAIJ,
1325        MatMult_SeqBAIJ_N,
1326        MatMultAdd_SeqBAIJ_N,
1327        MatMultTrans_SeqBAIJ,
1328        MatMultTransAdd_SeqBAIJ,
1329        MatSolve_SeqBAIJ_N,
1330        0,
1331        0,
1332        0,
1333        MatLUFactor_SeqBAIJ,
1334        0,
1335        0,
1336        MatTranspose_SeqBAIJ,
1337        MatGetInfo_SeqBAIJ,
1338        MatEqual_SeqBAIJ,
1339        MatGetDiagonal_SeqBAIJ,
1340        MatDiagonalScale_SeqBAIJ,
1341        MatNorm_SeqBAIJ,
1342        0,
1343        MatAssemblyEnd_SeqBAIJ,
1344        0,
1345        MatSetOption_SeqBAIJ,
1346        MatZeroEntries_SeqBAIJ,
1347        MatZeroRows_SeqBAIJ,
1348        MatLUFactorSymbolic_SeqBAIJ,
1349        MatLUFactorNumeric_SeqBAIJ_N,
1350        0,
1351        0,
1352        MatGetSize_SeqBAIJ,
1353        MatGetSize_SeqBAIJ,
1354        MatGetOwnershipRange_SeqBAIJ,
1355        MatILUFactorSymbolic_SeqBAIJ,
1356        0,
1357        0,
1358        0,
1359        MatDuplicate_SeqBAIJ,
1360        0,
1361        0,
1362        MatILUFactor_SeqBAIJ,
1363        0,
1364        0,
1365        MatGetSubMatrices_SeqBAIJ,
1366        MatIncreaseOverlap_SeqBAIJ,
1367        MatGetValues_SeqBAIJ,
1368        0,
1369        MatPrintHelp_SeqBAIJ,
1370        MatScale_SeqBAIJ,
1371        0,
1372        0,
1373        0,
1374        MatGetBlockSize_SeqBAIJ,
1375        MatGetRowIJ_SeqBAIJ,
1376        MatRestoreRowIJ_SeqBAIJ,
1377        0,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        MatSetValuesBlocked_SeqBAIJ,
1384        MatGetSubMatrix_SeqBAIJ,
1385        0,
1386        0,
1387        MatGetMaps_Petsc};
1388 
1389 EXTERN_C_BEGIN
1390 #undef __FUNC__
1391 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1392 int MatStoreValues_SeqBAIJ(Mat mat)
1393 {
1394   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1395   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1396   int         ierr;
1397 
1398   PetscFunctionBegin;
1399   if (aij->nonew != 1) {
1400     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1401   }
1402 
1403   /* allocate space for values if not already there */
1404   if (!aij->saved_values) {
1405     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1406   }
1407 
1408   /* copy values over */
1409   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 EXTERN_C_END
1413 
1414 EXTERN_C_BEGIN
1415 #undef __FUNC__
1416 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1417 int MatRetrieveValues_SeqBAIJ(Mat mat)
1418 {
1419   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1420   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1421 
1422   PetscFunctionBegin;
1423   if (aij->nonew != 1) {
1424     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1425   }
1426   if (!aij->saved_values) {
1427     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1428   }
1429 
1430   /* copy values over */
1431   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 EXTERN_C_END
1435 
1436 #undef __FUNC__
1437 #define __FUNC__ "MatCreateSeqBAIJ"
1438 /*@C
1439    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1440    compressed row) format.  For good matrix assembly performance the
1441    user should preallocate the matrix storage by setting the parameter nz
1442    (or the array nnz).  By setting these parameters accurately, performance
1443    during matrix assembly can be increased by more than a factor of 50.
1444 
1445    Collective on MPI_Comm
1446 
1447    Input Parameters:
1448 +  comm - MPI communicator, set to PETSC_COMM_SELF
1449 .  bs - size of block
1450 .  m - number of rows
1451 .  n - number of columns
1452 .  nz - number of block nonzeros per block row (same for all rows)
1453 -  nnz - array containing the number of block nonzeros in the various block rows
1454          (possibly different for each block row) or PETSC_NULL
1455 
1456    Output Parameter:
1457 .  A - the matrix
1458 
1459    Options Database Keys:
1460 .   -mat_no_unroll - uses code that does not unroll the loops in the
1461                      block calculations (much slower)
1462 .    -mat_block_size - size of the blocks to use
1463 
1464    Level: intermediate
1465 
1466    Notes:
1467    The block AIJ format is fully compatible with standard Fortran 77
1468    storage.  That is, the stored row and column indices can begin at
1469    either one (as in Fortran) or zero.  See the users' manual for details.
1470 
1471    Specify the preallocated storage with either nz or nnz (not both).
1472    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1473    allocation.  For additional details, see the users manual chapter on
1474    matrices.
1475 
1476 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1477 @*/
1478 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1479 {
1480   Mat         B;
1481   Mat_SeqBAIJ *b;
1482   int         i,len,ierr,mbs,nbs,bs2,size;
1483   PetscTruth  flg;
1484 
1485   PetscFunctionBegin;
1486   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1487   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1488 
1489   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1490   mbs  = m/bs;
1491   nbs  = n/bs;
1492   bs2  = bs*bs;
1493 
1494   if (mbs*bs!=m || nbs*bs!=n) {
1495     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1496   }
1497 
1498   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1499   if (nnz) {
1500     for (i=0; i<mbs; i++) {
1501       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1502     }
1503   }
1504 
1505   *A = 0;
1506   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1507   PLogObjectCreate(B);
1508   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1509   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1510   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1511   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1512   if (!flg) {
1513     switch (bs) {
1514     case 1:
1515       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1516       B->ops->solve           = MatSolve_SeqBAIJ_1;
1517       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_1;
1518       B->ops->mult            = MatMult_SeqBAIJ_1;
1519       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1520       break;
1521     case 2:
1522       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1523       B->ops->solve           = MatSolve_SeqBAIJ_2;
1524       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2;
1525       B->ops->mult            = MatMult_SeqBAIJ_2;
1526       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1527       break;
1528     case 3:
1529       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1530       B->ops->solve           = MatSolve_SeqBAIJ_3;
1531       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3;
1532       B->ops->mult            = MatMult_SeqBAIJ_3;
1533       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1534       break;
1535     case 4:
1536       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1537       B->ops->solve           = MatSolve_SeqBAIJ_4;
1538       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4;
1539       B->ops->mult            = MatMult_SeqBAIJ_4;
1540       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1541       break;
1542     case 5:
1543       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1544       B->ops->solve           = MatSolve_SeqBAIJ_5;
1545       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5;
1546       B->ops->mult            = MatMult_SeqBAIJ_5;
1547       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1548       break;
1549     case 6:
1550       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1551       B->ops->solve           = MatSolve_SeqBAIJ_6;
1552       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6;
1553       B->ops->mult            = MatMult_SeqBAIJ_6;
1554       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1555       break;
1556     case 7:
1557       B->ops->mult            = MatMult_SeqBAIJ_7;
1558       B->ops->solve           = MatSolve_SeqBAIJ_7;
1559       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7;
1560       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1561       break;
1562     }
1563   }
1564   B->ops->destroy     = MatDestroy_SeqBAIJ;
1565   B->ops->view        = MatView_SeqBAIJ;
1566   B->factor           = 0;
1567   B->lupivotthreshold = 1.0;
1568   B->mapping          = 0;
1569   b->row              = 0;
1570   b->col              = 0;
1571   b->icol             = 0;
1572   b->reallocs         = 0;
1573   b->saved_values     = 0;
1574 
1575   b->m       = m; B->m = m; B->M = m;
1576   b->n       = n; B->n = n; B->N = n;
1577 
1578   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1579   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1580 
1581   b->mbs     = mbs;
1582   b->nbs     = nbs;
1583   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1584   if (!nnz) {
1585     if (nz == PETSC_DEFAULT) nz = 5;
1586     else if (nz <= 0)        nz = 1;
1587     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1588     nz = nz*mbs;
1589   } else {
1590     nz = 0;
1591     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1592   }
1593 
1594   /* allocate the matrix space */
1595   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1596   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1597   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1598   b->j  = (int *) (b->a + nz*bs2);
1599   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1600   b->i  = b->j + nz;
1601   b->singlemalloc = PETSC_TRUE;
1602 
1603   b->i[0] = 0;
1604   for (i=1; i<mbs+1; i++) {
1605     b->i[i] = b->i[i-1] + b->imax[i-1];
1606   }
1607 
1608   /* b->ilen will count nonzeros in each block row so far. */
1609   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1610   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1611   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1612 
1613   b->bs               = bs;
1614   b->bs2              = bs2;
1615   b->mbs              = mbs;
1616   b->nz               = 0;
1617   b->maxnz            = nz*bs2;
1618   b->sorted           = PETSC_FALSE;
1619   b->roworiented      = PETSC_TRUE;
1620   b->nonew            = 0;
1621   b->diag             = 0;
1622   b->solve_work       = 0;
1623   b->mult_work        = 0;
1624   b->spptr            = 0;
1625   B->info.nz_unneeded = (double)b->maxnz;
1626   b->keepzeroedrows   = PETSC_FALSE;
1627 
1628   *A = B;
1629   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1630   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1631 
1632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1633                                      "MatStoreValues_SeqBAIJ",
1634                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1636                                      "MatRetrieveValues_SeqBAIJ",
1637                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1639                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1640                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNC__
1645 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1646 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1647 {
1648   Mat         C;
1649   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1650   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1651 
1652   PetscFunctionBegin;
1653   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1654 
1655   *B = 0;
1656   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1657   PLogObjectCreate(C);
1658   C->data           = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1659   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1660   C->ops->destroy   = MatDestroy_SeqBAIJ;
1661   C->ops->view      = MatView_SeqBAIJ;
1662   C->factor         = A->factor;
1663   c->row            = 0;
1664   c->col            = 0;
1665   c->icol           = 0;
1666   c->saved_values   = 0;
1667   c->keepzeroedrows = a->keepzeroedrows;
1668   C->assembled      = PETSC_TRUE;
1669 
1670   c->m = C->m   = a->m;
1671   c->n = C->n   = a->n;
1672   C->M          = a->m;
1673   C->N          = a->n;
1674 
1675   c->bs         = a->bs;
1676   c->bs2        = a->bs2;
1677   c->mbs        = a->mbs;
1678   c->nbs        = a->nbs;
1679 
1680   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1681   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1682   for ( i=0; i<mbs; i++ ) {
1683     c->imax[i] = a->imax[i];
1684     c->ilen[i] = a->ilen[i];
1685   }
1686 
1687   /* allocate the matrix space */
1688   c->singlemalloc = PETSC_TRUE;
1689   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1690   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1691   c->j = (int *) (c->a + nz*bs2);
1692   c->i = c->j + nz;
1693   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1694   if (mbs > 0) {
1695     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1696     if (cpvalues == MAT_COPY_VALUES) {
1697       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1698     } else {
1699       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1700     }
1701   }
1702 
1703   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1704   c->sorted      = a->sorted;
1705   c->roworiented = a->roworiented;
1706   c->nonew       = a->nonew;
1707 
1708   if (a->diag) {
1709     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1710     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1711     for ( i=0; i<mbs; i++ ) {
1712       c->diag[i] = a->diag[i];
1713     }
1714   } else c->diag        = 0;
1715   c->nz                 = a->nz;
1716   c->maxnz              = a->maxnz;
1717   c->solve_work         = 0;
1718   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1719   c->mult_work          = 0;
1720   *B = C;
1721   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNC__
1726 #define __FUNC__ "MatLoad_SeqBAIJ"
1727 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1728 {
1729   Mat_SeqBAIJ  *a;
1730   Mat          B;
1731   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1732   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1733   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1734   int          *masked, nmask,tmp,bs2,ishift;
1735   Scalar       *aa;
1736   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1737 
1738   PetscFunctionBegin;
1739   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1740   bs2  = bs*bs;
1741 
1742   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1743   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1744   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1745   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1746   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1747   M = header[1]; N = header[2]; nz = header[3];
1748 
1749   if (header[3] < 0) {
1750     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1751   }
1752 
1753   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1754 
1755   /*
1756      This code adds extra rows to make sure the number of rows is
1757     divisible by the blocksize
1758   */
1759   mbs        = M/bs;
1760   extra_rows = bs - M + bs*(mbs);
1761   if (extra_rows == bs) extra_rows = 0;
1762   else                  mbs++;
1763   if (extra_rows) {
1764     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1765   }
1766 
1767   /* read in row lengths */
1768   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1769   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1770   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1771 
1772   /* read in column indices */
1773   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1774   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1775   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1776 
1777   /* loop over row lengths determining block row lengths */
1778   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1779   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1780   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1781   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1782   masked      = mask + mbs;
1783   rowcount    = 0; nzcount = 0;
1784   for ( i=0; i<mbs; i++ ) {
1785     nmask = 0;
1786     for ( j=0; j<bs; j++ ) {
1787       kmax = rowlengths[rowcount];
1788       for ( k=0; k<kmax; k++ ) {
1789         tmp = jj[nzcount++]/bs;
1790         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1791       }
1792       rowcount++;
1793     }
1794     browlengths[i] += nmask;
1795     /* zero out the mask elements we set */
1796     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1797   }
1798 
1799   /* create our matrix */
1800   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1801   B = *A;
1802   a = (Mat_SeqBAIJ *) B->data;
1803 
1804   /* set matrix "i" values */
1805   a->i[0] = 0;
1806   for ( i=1; i<= mbs; i++ ) {
1807     a->i[i]      = a->i[i-1] + browlengths[i-1];
1808     a->ilen[i-1] = browlengths[i-1];
1809   }
1810   a->nz         = 0;
1811   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1812 
1813   /* read in nonzero values */
1814   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1815   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1816   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1817 
1818   /* set "a" and "j" values into matrix */
1819   nzcount = 0; jcount = 0;
1820   for ( i=0; i<mbs; i++ ) {
1821     nzcountb = nzcount;
1822     nmask    = 0;
1823     for ( j=0; j<bs; j++ ) {
1824       kmax = rowlengths[i*bs+j];
1825       for ( k=0; k<kmax; k++ ) {
1826         tmp = jj[nzcount++]/bs;
1827 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1828       }
1829       rowcount++;
1830     }
1831     /* sort the masked values */
1832     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1833 
1834     /* set "j" values into matrix */
1835     maskcount = 1;
1836     for ( j=0; j<nmask; j++ ) {
1837       a->j[jcount++]  = masked[j];
1838       mask[masked[j]] = maskcount++;
1839     }
1840     /* set "a" values into matrix */
1841     ishift = bs2*a->i[i];
1842     for ( j=0; j<bs; j++ ) {
1843       kmax = rowlengths[i*bs+j];
1844       for ( k=0; k<kmax; k++ ) {
1845         tmp       = jj[nzcountb]/bs ;
1846         block     = mask[tmp] - 1;
1847         point     = jj[nzcountb] - bs*tmp;
1848         idx       = ishift + bs2*block + j + bs*point;
1849         a->a[idx] = aa[nzcountb++];
1850       }
1851     }
1852     /* zero out the mask elements we set */
1853     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1854   }
1855   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1856 
1857   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1858   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1859   ierr = PetscFree(aa);CHKERRQ(ierr);
1860   ierr = PetscFree(jj);CHKERRQ(ierr);
1861   ierr = PetscFree(mask);CHKERRQ(ierr);
1862 
1863   B->assembled = PETSC_TRUE;
1864 
1865   ierr = MatView_Private(B);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 
1870 
1871