xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 37b5b9611b07231c0c9c8e7355d420c04053c2bc)
1 /*$Id: baij.c,v 1.197 2000/01/26 21:50:39 bsmith Exp balay $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
37 int MatMarkDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
40   int         i,j,*diag,m = a->mbs;
41 
42   PetscFunctionBegin;
43   if (a->diag) PetscFunctionReturn(0);
44 
45   diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag);
46   PLogObjectMemory(A,(m+1)*sizeof(int));
47   for (i=0; i<m; i++) {
48     diag[i] = a->i[i+1];
49     for (j=a->i[i]; j<a->i[i+1]; j++) {
50       if (a->j[j] == i) {
51         diag[i] = j;
52         break;
53       }
54     }
55   }
56   a->diag = diag;
57   PetscFunctionReturn(0);
58 }
59 
60 
61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
62 
63 #undef __FUNC__
64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
66 {
67   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
68   int         ierr,n = a->mbs,i;
69 
70   PetscFunctionBegin;
71   *nn = n;
72   if (!ia) PetscFunctionReturn(0);
73   if (symmetric) {
74     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
75   } else if (oshift == 1) {
76     /* temporarily add 1 to i and j indices */
77     int nz = a->i[n] + 1;
78     for (i=0; i<nz; i++) a->j[i]++;
79     for (i=0; i<n+1; i++) a->i[i]++;
80     *ia = a->i; *ja = a->j;
81   } else {
82     *ia = a->i; *ja = a->j;
83   }
84 
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
91                                 PetscTruth *done)
92 {
93   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
94   int         i,n = a->mbs,ierr;
95 
96   PetscFunctionBegin;
97   if (!ia) PetscFunctionReturn(0);
98   if (symmetric) {
99     ierr = PetscFree(*ia);CHKERRQ(ierr);
100     ierr = PetscFree(*ja);CHKERRQ(ierr);
101   } else if (oshift == 1) {
102     int nz = a->i[n];
103     for (i=0; i<nz; i++) a->j[i]--;
104     for (i=0; i<n+1; i++) a->i[i]--;
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNC__
110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
112 {
113   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
114 
115   PetscFunctionBegin;
116   *bs = baij->bs;
117   PetscFunctionReturn(0);
118 }
119 
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatDestroy_SeqBAIJ"
123 int MatDestroy_SeqBAIJ(Mat A)
124 {
125   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126   int         ierr;
127 
128   PetscFunctionBegin;
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(PETSC_USE_LOG)
142   PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162   ierr = PetscFree(a);CHKERRQ(ierr);
163   PLogObjectDestroy(A);
164   PetscHeaderDestroy(A);
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNC__
169 #define __FUNC__ "MatSetOption_SeqBAIJ"
170 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173 
174   PetscFunctionBegin;
175   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
176   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
177   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
178   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
179   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
180   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
181   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
182   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
183   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
184   else if (op == MAT_ROWS_SORTED ||
185            op == MAT_ROWS_UNSORTED ||
186            op == MAT_SYMMETRIC ||
187            op == MAT_STRUCTURALLY_SYMMETRIC ||
188            op == MAT_YES_NEW_DIAGONALS ||
189            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
190            op == MAT_USE_HASH_TABLE) {
191     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
192   } else if (op == MAT_NO_NEW_DIAGONALS) {
193     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
194   } else {
195     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 
201 #undef __FUNC__
202 #define __FUNC__ "MatGetSize_SeqBAIJ"
203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
204 {
205   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
206 
207   PetscFunctionBegin;
208   if (m) *m = a->m;
209   if (n) *n = a->n;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218 
219   PetscFunctionBegin;
220   *m = 0; *n = a->m;
221   PetscFunctionReturn(0);
222 }
223 
224 #undef __FUNC__
225 #define __FUNC__ "MatGetRow_SeqBAIJ"
226 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
227 {
228   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
229   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
230   MatScalar    *aa,*aa_i;
231   Scalar       *v_i;
232 
233   PetscFunctionBegin;
234   bs  = a->bs;
235   ai  = a->i;
236   aj  = a->j;
237   aa  = a->a;
238   bs2 = a->bs2;
239 
240   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
241 
242   bn  = row/bs;   /* Block number */
243   bp  = row % bs; /* Block Position */
244   M   = ai[bn+1] - ai[bn];
245   *nz = bs*M;
246 
247   if (v) {
248     *v = 0;
249     if (*nz) {
250       *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v);
251       for (i=0; i<M; i++) { /* for each block in the block row */
252         v_i  = *v + i*bs;
253         aa_i = aa + bs2*(ai[bn] + i);
254         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
255       }
256     }
257   }
258 
259   if (idx) {
260     *idx = 0;
261     if (*nz) {
262       *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx);
263       for (i=0; i<M; i++) { /* for each block in the block row */
264         idx_i = *idx + i*bs;
265         itmp  = bs*aj[ai[bn] + i];
266         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
275 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
276 {
277   int ierr;
278 
279   PetscFunctionBegin;
280   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
281   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNC__
286 #define __FUNC__ "MatTranspose_SeqBAIJ"
287 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
288 {
289   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
290   Mat         C;
291   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
292   int         *rows,*cols,bs2=a->bs2,refct;
293   MatScalar   *array = a->a;
294 
295   PetscFunctionBegin;
296   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
297   col  = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
298   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
299 
300   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
301   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
302   ierr = PetscFree(col);CHKERRQ(ierr);
303   rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
304   cols = rows + bs;
305   for (i=0; i<mbs; i++) {
306     cols[0] = i*bs;
307     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
308     len = ai[i+1] - ai[i];
309     for (j=0; j<len; j++) {
310       rows[0] = (*aj++)*bs;
311       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
312       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
313       array += bs2;
314     }
315   }
316   ierr = PetscFree(rows);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320 
321   if (B) {
322     *B = C;
323   } else {
324     PetscOps *Abops;
325     MatOps   Aops;
326 
327     /* This isn't really an in-place transpose */
328     ierr = PetscFree(a->a);CHKERRQ(ierr);
329     if (!a->singlemalloc) {
330       ierr = PetscFree(a->i);CHKERRQ(ierr);
331       ierr = PetscFree(a->j);CHKERRQ(ierr);
332     }
333     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
334     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
335     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
336     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
337     ierr = PetscFree(a);CHKERRQ(ierr);
338 
339 
340     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
341     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
342 
343     /*
344        This is horrible, horrible code. We need to keep the
345       A pointers for the bops and ops but copy everything
346       else from C.
347     */
348     Abops    = A->bops;
349     Aops     = A->ops;
350     refct    = A->refct;
351     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
352     A->bops  = Abops;
353     A->ops   = Aops;
354     A->qlist = 0;
355     A->refct = refct;
356 
357     PetscHeaderDestroy(C);
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNC__
363 #define __FUNC__ "MatView_SeqBAIJ_Binary"
364 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
365 {
366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
367   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
368   Scalar      *aa;
369   FILE        *file;
370 
371   PetscFunctionBegin;
372   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
373   col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
374   col_lens[0] = MAT_COOKIE;
375 
376   col_lens[1] = a->m;
377   col_lens[2] = a->n;
378   col_lens[3] = a->nz*bs2;
379 
380   /* store lengths of each row and write (including header) to file */
381   count = 0;
382   for (i=0; i<a->mbs; i++) {
383     for (j=0; j<bs; j++) {
384       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
385     }
386   }
387   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
388   ierr = PetscFree(col_lens);CHKERRQ(ierr);
389 
390   /* store column indices (zero start index) */
391   jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj);
392   count = 0;
393   for (i=0; i<a->mbs; i++) {
394     for (j=0; j<bs; j++) {
395       for (k=a->i[i]; k<a->i[i+1]; k++) {
396         for (l=0; l<bs; l++) {
397           jj[count++] = bs*a->j[k] + l;
398         }
399       }
400     }
401   }
402   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
403   ierr = PetscFree(jj);CHKERRQ(ierr);
404 
405   /* store nonzero values */
406   aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
407   count = 0;
408   for (i=0; i<a->mbs; i++) {
409     for (j=0; j<bs; j++) {
410       for (k=a->i[i]; k<a->i[i+1]; k++) {
411         for (l=0; l<bs; l++) {
412           aa[count++] = a->a[bs2*k + l*bs + j];
413         }
414       }
415     }
416   }
417   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
418   ierr = PetscFree(aa);CHKERRQ(ierr);
419 
420   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
421   if (file) {
422     fprintf(file,"-matload_block_size %d\n",a->bs);
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNC__
428 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
429 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
430 {
431   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
432   int         ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2;
433   char        *outputname;
434 
435   PetscFunctionBegin;
436   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
437   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
439     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
442   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
443     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = ViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = ViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNC__
502 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   Viewer       viewer;
511 
512   PetscFunctionBegin;
513 
514   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
515   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
516 
517   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
518 
519   /* loop over matrix elements drawing boxes */
520   color = DRAW_BLUE;
521   for (i=0,row=0; i<mbs; i++,row+=bs) {
522     for (j=a->i[i]; j<a->i[i+1]; j++) {
523       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
524       x_l = a->j[j]*bs; x_r = x_l + 1.0;
525       aa = a->a + j*bs2;
526       for (k=0; k<bs; k++) {
527         for (l=0; l<bs; l++) {
528           if (PetscRealPart(*aa++) >=  0.) continue;
529           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
530         }
531       }
532     }
533   }
534   color = DRAW_CYAN;
535   for (i=0,row=0; i<mbs; i++,row+=bs) {
536     for (j=a->i[i]; j<a->i[i+1]; j++) {
537       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for (k=0; k<bs; k++) {
541         for (l=0; l<bs; l++) {
542           if (PetscRealPart(*aa++) != 0.) continue;
543           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548 
549   color = DRAW_RED;
550   for (i=0,row=0; i<mbs; i++,row+=bs) {
551     for (j=a->i[i]; j<a->i[i+1]; j++) {
552       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
553       x_l = a->j[j]*bs; x_r = x_l + 1.0;
554       aa = a->a + j*bs2;
555       for (k=0; k<bs; k++) {
556         for (l=0; l<bs; l++) {
557           if (PetscRealPart(*aa++) <= 0.) continue;
558           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNC__
567 #define __FUNC__ "MatView_SeqBAIJ_Draw"
568 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
569 {
570   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
571   int          ierr;
572   PetscReal    xl,yl,xr,yr,w,h;
573   Draw         draw;
574   PetscTruth   isnull;
575 
576   PetscFunctionBegin;
577 
578   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
579   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
580 
581   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
582   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
583   xr += w;    yr += h;  xl = -w;     yl = -h;
584   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
585   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
586   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatView_SeqBAIJ"
592 int MatView_SeqBAIJ(Mat A,Viewer viewer)
593 {
594   int        ierr;
595   PetscTruth issocket,isascii,isbinary,isdraw;
596 
597   PetscFunctionBegin;
598   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
602   if (issocket) {
603     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
604   } else if (isascii){
605     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
606   } else if (isbinary) {
607     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
608   } else if (isdraw) {
609     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
610   } else {
611     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 
617 #undef __FUNC__
618 #define __FUNC__ "MatGetValues_SeqBAIJ"
619 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
620 {
621   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
622   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
623   int        *ai = a->i,*ailen = a->ilen;
624   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
625   MatScalar  *ap,*aa = a->a,zero = 0.0;
626 
627   PetscFunctionBegin;
628   for (k=0; k<m; k++) { /* loop over rows */
629     row  = im[k]; brow = row/bs;
630     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
631     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
632     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
633     nrow = ailen[brow];
634     for (l=0; l<n; l++) { /* loop over columns */
635       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
636       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
637       col  = in[l] ;
638       bcol = col/bs;
639       cidx = col%bs;
640       ridx = row%bs;
641       high = nrow;
642       low  = 0; /* assume unsorted */
643       while (high-low > 5) {
644         t = (low+high)/2;
645         if (rp[t] > bcol) high = t;
646         else             low  = t;
647       }
648       for (i=low; i<high; i++) {
649         if (rp[i] > bcol) break;
650         if (rp[i] == bcol) {
651           *v++ = ap[bs2*i+bs*cidx+ridx];
652           goto finished;
653         }
654       }
655       *v++ = zero;
656       finished:;
657     }
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 
663 #undef __FUNC__
664 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
665 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
666 {
667   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
668   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
669   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
670   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
671   Scalar      *value = v;
672   MatScalar   *ap,*aa=a->a,*bap;
673 
674   PetscFunctionBegin;
675   if (roworiented) {
676     stepval = (n-1)*bs;
677   } else {
678     stepval = (m-1)*bs;
679   }
680   for (k=0; k<m; k++) { /* loop over added rows */
681     row  = im[k];
682     if (row < 0) continue;
683 #if defined(PETSC_USE_BOPT_g)
684     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
685 #endif
686     rp   = aj + ai[row];
687     ap   = aa + bs2*ai[row];
688     rmax = imax[row];
689     nrow = ailen[row];
690     low  = 0;
691     for (l=0; l<n; l++) { /* loop over added columns */
692       if (in[l] < 0) continue;
693 #if defined(PETSC_USE_BOPT_g)
694       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
695 #endif
696       col = in[l];
697       if (roworiented) {
698         value = v + k*(stepval+bs)*bs + l*bs;
699       } else {
700         value = v + l*(stepval+bs)*bs + k*bs;
701       }
702       if (!sorted) low = 0; high = nrow;
703       while (high-low > 7) {
704         t = (low+high)/2;
705         if (rp[t] > col) high = t;
706         else             low  = t;
707       }
708       for (i=low; i<high; i++) {
709         if (rp[i] > col) break;
710         if (rp[i] == col) {
711           bap  = ap +  bs2*i;
712           if (roworiented) {
713             if (is == ADD_VALUES) {
714               for (ii=0; ii<bs; ii++,value+=stepval) {
715                 for (jj=ii; jj<bs2; jj+=bs) {
716                   bap[jj] += *value++;
717                 }
718               }
719             } else {
720               for (ii=0; ii<bs; ii++,value+=stepval) {
721                 for (jj=ii; jj<bs2; jj+=bs) {
722                   bap[jj] = *value++;
723                 }
724               }
725             }
726           } else {
727             if (is == ADD_VALUES) {
728               for (ii=0; ii<bs; ii++,value+=stepval) {
729                 for (jj=0; jj<bs; jj++) {
730                   *bap++ += *value++;
731                 }
732               }
733             } else {
734               for (ii=0; ii<bs; ii++,value+=stepval) {
735                 for (jj=0; jj<bs; jj++) {
736                   *bap++  = *value++;
737                 }
738               }
739             }
740           }
741           goto noinsert2;
742         }
743       }
744       if (nonew == 1) goto noinsert2;
745       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
746       if (nrow >= rmax) {
747         /* there is no extra room in row, therefore enlarge */
748         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
749         MatScalar *new_a;
750 
751         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
752 
753         /* malloc new storage space */
754         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
755         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
756         new_j   = (int*)(new_a + bs2*new_nz);
757         new_i   = new_j + new_nz;
758 
759         /* copy over old data into new slots */
760         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
761         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
762         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
763         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
764         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
765         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
766         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
767         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
768         /* free up old matrix storage */
769         ierr = PetscFree(a->a);CHKERRQ(ierr);
770         if (!a->singlemalloc) {
771           ierr = PetscFree(a->i);CHKERRQ(ierr);
772           ierr = PetscFree(a->j);CHKERRQ(ierr);
773         }
774         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
775         a->singlemalloc = PETSC_TRUE;
776 
777         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
778         rmax = imax[row] = imax[row] + CHUNKSIZE;
779         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
780         a->maxnz += bs2*CHUNKSIZE;
781         a->reallocs++;
782         a->nz++;
783       }
784       N = nrow++ - 1;
785       /* shift up all the later entries in this row */
786       for (ii=N; ii>=i; ii--) {
787         rp[ii+1] = rp[ii];
788         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
789       }
790       if (N >= i) {
791         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
792       }
793       rp[i] = col;
794       bap   = ap +  bs2*i;
795       if (roworiented) {
796         for (ii=0; ii<bs; ii++,value+=stepval) {
797           for (jj=ii; jj<bs2; jj+=bs) {
798             bap[jj] = *value++;
799           }
800         }
801       } else {
802         for (ii=0; ii<bs; ii++,value+=stepval) {
803           for (jj=0; jj<bs; jj++) {
804             *bap++  = *value++;
805           }
806         }
807       }
808       noinsert2:;
809       low = i;
810     }
811     ailen[row] = nrow;
812   }
813   PetscFunctionReturn(0);
814 }
815 
816 
817 #undef __FUNC__
818 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
819 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
820 {
821   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
822   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
823   int        m = a->m,*ip,N,*ailen = a->ilen;
824   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
825   MatScalar  *aa = a->a,*ap;
826 
827   PetscFunctionBegin;
828   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
829 
830   if (m) rmax = ailen[0];
831   for (i=1; i<mbs; i++) {
832     /* move each row back by the amount of empty slots (fshift) before it*/
833     fshift += imax[i-1] - ailen[i-1];
834     rmax   = PetscMax(rmax,ailen[i]);
835     if (fshift) {
836       ip = aj + ai[i]; ap = aa + bs2*ai[i];
837       N = ailen[i];
838       for (j=0; j<N; j++) {
839         ip[j-fshift] = ip[j];
840         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
841       }
842     }
843     ai[i] = ai[i-1] + ailen[i-1];
844   }
845   if (mbs) {
846     fshift += imax[mbs-1] - ailen[mbs-1];
847     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
848   }
849   /* reset ilen and imax for each row */
850   for (i=0; i<mbs; i++) {
851     ailen[i] = imax[i] = ai[i+1] - ai[i];
852   }
853   a->nz = ai[mbs];
854 
855   /* diagonals may have moved, so kill the diagonal pointers */
856   if (fshift && a->diag) {
857     ierr = PetscFree(a->diag);CHKERRQ(ierr);
858     PLogObjectMemory(A,-(m+1)*sizeof(int));
859     a->diag = 0;
860   }
861   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
862            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
863   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
864            a->reallocs);
865   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
866   a->reallocs          = 0;
867   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
868 
869   PetscFunctionReturn(0);
870 }
871 
872 
873 
874 /*
875    This function returns an array of flags which indicate the locations of contiguous
876    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
877    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
878    Assume: sizes should be long enough to hold all the values.
879 */
880 #undef __FUNC__
881 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
882 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
883 {
884   int        i,j,k,row;
885   PetscTruth flg;
886 
887   PetscFunctionBegin;
888   for (i=0,j=0; i<n; j++) {
889     row = idx[i];
890     if (row%bs!=0) { /* Not the begining of a block */
891       sizes[j] = 1;
892       i++;
893     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
894       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
895       i++;
896     } else { /* Begining of the block, so check if the complete block exists */
897       flg = PETSC_TRUE;
898       for (k=1; k<bs; k++) {
899         if (row+k != idx[i+k]) { /* break in the block */
900           flg = PETSC_FALSE;
901           break;
902         }
903       }
904       if (flg == PETSC_TRUE) { /* No break in the bs */
905         sizes[j] = bs;
906         i+= bs;
907       } else {
908         sizes[j] = 1;
909         i++;
910       }
911     }
912   }
913   *bs_max = j;
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNC__
918 #define __FUNC__ "MatZeroRows_SeqBAIJ"
919 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
920 {
921   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
922   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
923   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
924   Scalar      zero = 0.0;
925   MatScalar   *aa;
926 
927   PetscFunctionBegin;
928   /* Make a copy of the IS and  sort it */
929   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
930   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
931 
932   /* allocate memory for rows,sizes */
933   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
934   sizes = rows + is_n;
935 
936   /* initialize copy IS valurs to rows, and sort them */
937   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
938   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
939   if (baij->keepzeroedrows) {
940     for (i=0; i<is_n; i++) { sizes[i] = 1; }
941     bs_max = is_n;
942   } else {
943     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
944   }
945   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
946 
947   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
948     row   = rows[j];
949     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
950     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
951     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
952     if (sizes[i] == bs && !baij->keepzeroedrows) {
953       if (diag) {
954         if (baij->ilen[row/bs] > 0) {
955           baij->ilen[row/bs] = 1;
956           baij->j[baij->i[row/bs]] = row/bs;
957           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
958         }
959         /* Now insert all the diagoanl values for this bs */
960         for (k=0; k<bs; k++) {
961           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
962         }
963       } else { /* (!diag) */
964         baij->ilen[row/bs] = 0;
965       } /* end (!diag) */
966     } else { /* (sizes[i] != bs) */
967 #if defined (PETSC_USE_DEBUG)
968       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
969 #endif
970       for (k=0; k<count; k++) {
971         aa[0] = zero;
972         aa+=bs;
973       }
974       if (diag) {
975         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
976       }
977     }
978   }
979 
980   ierr = PetscFree(rows);CHKERRQ(ierr);
981   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNC__
986 #define __FUNC__ "MatSetValues_SeqBAIJ"
987 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
988 {
989   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
990   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
991   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
992   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
993   int         ridx,cidx,bs2=a->bs2,ierr;
994   MatScalar   *ap,value,*aa=a->a,*bap;
995 
996   PetscFunctionBegin;
997   for (k=0; k<m; k++) { /* loop over added rows */
998     row  = im[k]; brow = row/bs;
999     if (row < 0) continue;
1000 #if defined(PETSC_USE_BOPT_g)
1001     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1002 #endif
1003     rp   = aj + ai[brow];
1004     ap   = aa + bs2*ai[brow];
1005     rmax = imax[brow];
1006     nrow = ailen[brow];
1007     low  = 0;
1008     for (l=0; l<n; l++) { /* loop over added columns */
1009       if (in[l] < 0) continue;
1010 #if defined(PETSC_USE_BOPT_g)
1011       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1012 #endif
1013       col = in[l]; bcol = col/bs;
1014       ridx = row % bs; cidx = col % bs;
1015       if (roworiented) {
1016         value = v[l + k*n];
1017       } else {
1018         value = v[k + l*m];
1019       }
1020       if (!sorted) low = 0; high = nrow;
1021       while (high-low > 7) {
1022         t = (low+high)/2;
1023         if (rp[t] > bcol) high = t;
1024         else              low  = t;
1025       }
1026       for (i=low; i<high; i++) {
1027         if (rp[i] > bcol) break;
1028         if (rp[i] == bcol) {
1029           bap  = ap +  bs2*i + bs*cidx + ridx;
1030           if (is == ADD_VALUES) *bap += value;
1031           else                  *bap  = value;
1032           goto noinsert1;
1033         }
1034       }
1035       if (nonew == 1) goto noinsert1;
1036       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1037       if (nrow >= rmax) {
1038         /* there is no extra room in row, therefore enlarge */
1039         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1040         MatScalar *new_a;
1041 
1042         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1043 
1044         /* Malloc new storage space */
1045         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1046         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
1047         new_j   = (int*)(new_a + bs2*new_nz);
1048         new_i   = new_j + new_nz;
1049 
1050         /* copy over old data into new slots */
1051         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1052         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1053         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1054         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1055         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1056         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1057         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1058         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1059         /* free up old matrix storage */
1060         ierr = PetscFree(a->a);CHKERRQ(ierr);
1061         if (!a->singlemalloc) {
1062           ierr = PetscFree(a->i);CHKERRQ(ierr);
1063           ierr = PetscFree(a->j);CHKERRQ(ierr);
1064         }
1065         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1066         a->singlemalloc = PETSC_TRUE;
1067 
1068         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1069         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1070         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1071         a->maxnz += bs2*CHUNKSIZE;
1072         a->reallocs++;
1073         a->nz++;
1074       }
1075       N = nrow++ - 1;
1076       /* shift up all the later entries in this row */
1077       for (ii=N; ii>=i; ii--) {
1078         rp[ii+1] = rp[ii];
1079         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1080       }
1081       if (N>=i) {
1082         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1083       }
1084       rp[i]                      = bcol;
1085       ap[bs2*i + bs*cidx + ridx] = value;
1086       noinsert1:;
1087       low = i;
1088     }
1089     ailen[brow] = nrow;
1090   }
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*);
1095 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal);
1096 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1097 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1098 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1099 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1100 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1101 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1102 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1103 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1104 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1105 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1106 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1107 extern int MatZeroEntries_SeqBAIJ(Mat);
1108 
1109 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1110 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1111 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1112 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1113 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1114 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1115 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1116 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1117 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1118 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1119 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1120 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1121 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1122 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1123 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1124 
1125 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1126 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1127 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1128 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1129 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1130 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1131 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1132 
1133 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1134 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1135 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1136 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1137 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1138 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1139 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1140 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1141 
1142 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1143 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1144 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1145 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1146 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1147 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1148 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1149 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1150 
1151 #undef __FUNC__
1152 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1153 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1154 {
1155   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1156   Mat         outA;
1157   int         ierr;
1158   PetscTruth  row_identity,col_identity;
1159 
1160   PetscFunctionBegin;
1161   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1162   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1163   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1164   if (!row_identity || !col_identity) {
1165     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1166   }
1167 
1168   outA          = inA;
1169   inA->factor   = FACTOR_LU;
1170 
1171   if (!a->diag) {
1172     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1173   }
1174   /*
1175       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1176       for ILU(0) factorization with natural ordering
1177   */
1178   switch (a->bs) {
1179   case 1:
1180     inA->ops->solvetranspose   = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1181     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1182   case 2:
1183     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1184     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1185     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1186     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1187     break;
1188   case 3:
1189     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1190     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1191     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1192     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1193     break;
1194   case 4:
1195     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1196     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1197     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1198     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1199     break;
1200   case 5:
1201     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1202     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1203     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1204     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1205     break;
1206   case 6:
1207     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1208     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1209     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1210     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1211     break;
1212   case 7:
1213     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1214     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1215     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1216     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1217     break;
1218   default:
1219     a->row        = row;
1220     a->col        = col;
1221     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1222     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1223 
1224     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1225     ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1226     PLogObjectParent(inA,a->icol);
1227 
1228     if (!a->solve_work) {
1229       a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1230       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1231     }
1232   }
1233 
1234   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1235 
1236   PetscFunctionReturn(0);
1237 }
1238 #undef __FUNC__
1239 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1240 int MatPrintHelp_SeqBAIJ(Mat A)
1241 {
1242   static PetscTruth called = PETSC_FALSE;
1243   MPI_Comm          comm = A->comm;
1244   int               ierr;
1245 
1246   PetscFunctionBegin;
1247   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1248   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1249   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 EXTERN_C_BEGIN
1254 #undef __FUNC__
1255 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1256 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1257 {
1258   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1259   int         i,nz,n;
1260 
1261   PetscFunctionBegin;
1262   nz = baij->maxnz;
1263   n  = baij->n;
1264   for (i=0; i<nz; i++) {
1265     baij->j[i] = indices[i];
1266   }
1267   baij->nz = nz;
1268   for (i=0; i<n; i++) {
1269     baij->ilen[i] = baij->imax[i];
1270   }
1271 
1272   PetscFunctionReturn(0);
1273 }
1274 EXTERN_C_END
1275 
1276 #undef __FUNC__
1277 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1278 /*@
1279     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1280        in the matrix.
1281 
1282   Input Parameters:
1283 +  mat - the SeqBAIJ matrix
1284 -  indices - the column indices
1285 
1286   Level: advanced
1287 
1288   Notes:
1289     This can be called if you have precomputed the nonzero structure of the
1290   matrix and want to provide it to the matrix object to improve the performance
1291   of the MatSetValues() operation.
1292 
1293     You MUST have set the correct numbers of nonzeros per row in the call to
1294   MatCreateSeqBAIJ().
1295 
1296     MUST be called before any calls to MatSetValues();
1297 
1298 @*/
1299 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1300 {
1301   int ierr,(*f)(Mat,int *);
1302 
1303   PetscFunctionBegin;
1304   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1305   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1306   if (f) {
1307     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1308   } else {
1309     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1310   }
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /* -------------------------------------------------------------------*/
1315 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1316        MatGetRow_SeqBAIJ,
1317        MatRestoreRow_SeqBAIJ,
1318        MatMult_SeqBAIJ_N,
1319        MatMultAdd_SeqBAIJ_N,
1320        MatMultTranspose_SeqBAIJ,
1321        MatMultTransposeAdd_SeqBAIJ,
1322        MatSolve_SeqBAIJ_N,
1323        0,
1324        0,
1325        0,
1326        MatLUFactor_SeqBAIJ,
1327        0,
1328        0,
1329        MatTranspose_SeqBAIJ,
1330        MatGetInfo_SeqBAIJ,
1331        MatEqual_SeqBAIJ,
1332        MatGetDiagonal_SeqBAIJ,
1333        MatDiagonalScale_SeqBAIJ,
1334        MatNorm_SeqBAIJ,
1335        0,
1336        MatAssemblyEnd_SeqBAIJ,
1337        0,
1338        MatSetOption_SeqBAIJ,
1339        MatZeroEntries_SeqBAIJ,
1340        MatZeroRows_SeqBAIJ,
1341        MatLUFactorSymbolic_SeqBAIJ,
1342        MatLUFactorNumeric_SeqBAIJ_N,
1343        0,
1344        0,
1345        MatGetSize_SeqBAIJ,
1346        MatGetSize_SeqBAIJ,
1347        MatGetOwnershipRange_SeqBAIJ,
1348        MatILUFactorSymbolic_SeqBAIJ,
1349        0,
1350        0,
1351        0,
1352        MatDuplicate_SeqBAIJ,
1353        0,
1354        0,
1355        MatILUFactor_SeqBAIJ,
1356        0,
1357        0,
1358        MatGetSubMatrices_SeqBAIJ,
1359        MatIncreaseOverlap_SeqBAIJ,
1360        MatGetValues_SeqBAIJ,
1361        0,
1362        MatPrintHelp_SeqBAIJ,
1363        MatScale_SeqBAIJ,
1364        0,
1365        0,
1366        0,
1367        MatGetBlockSize_SeqBAIJ,
1368        MatGetRowIJ_SeqBAIJ,
1369        MatRestoreRowIJ_SeqBAIJ,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        MatSetValuesBlocked_SeqBAIJ,
1377        MatGetSubMatrix_SeqBAIJ,
1378        0,
1379        0,
1380        MatGetMaps_Petsc};
1381 
1382 EXTERN_C_BEGIN
1383 #undef __FUNC__
1384 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1385 int MatStoreValues_SeqBAIJ(Mat mat)
1386 {
1387   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1388   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1389   int         ierr;
1390 
1391   PetscFunctionBegin;
1392   if (aij->nonew != 1) {
1393     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1394   }
1395 
1396   /* allocate space for values if not already there */
1397   if (!aij->saved_values) {
1398     aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1399   }
1400 
1401   /* copy values over */
1402   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 EXTERN_C_END
1406 
1407 EXTERN_C_BEGIN
1408 #undef __FUNC__
1409 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1410 int MatRetrieveValues_SeqBAIJ(Mat mat)
1411 {
1412   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1413   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1414 
1415   PetscFunctionBegin;
1416   if (aij->nonew != 1) {
1417     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1418   }
1419   if (!aij->saved_values) {
1420     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1421   }
1422 
1423   /* copy values over */
1424   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 EXTERN_C_END
1428 
1429 #undef __FUNC__
1430 #define __FUNC__ "MatCreateSeqBAIJ"
1431 /*@C
1432    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1433    compressed row) format.  For good matrix assembly performance the
1434    user should preallocate the matrix storage by setting the parameter nz
1435    (or the array nnz).  By setting these parameters accurately, performance
1436    during matrix assembly can be increased by more than a factor of 50.
1437 
1438    Collective on MPI_Comm
1439 
1440    Input Parameters:
1441 +  comm - MPI communicator, set to PETSC_COMM_SELF
1442 .  bs - size of block
1443 .  m - number of rows
1444 .  n - number of columns
1445 .  nz - number of block nonzeros per block row (same for all rows)
1446 -  nnz - array containing the number of block nonzeros in the various block rows
1447          (possibly different for each block row) or PETSC_NULL
1448 
1449    Output Parameter:
1450 .  A - the matrix
1451 
1452    Options Database Keys:
1453 .   -mat_no_unroll - uses code that does not unroll the loops in the
1454                      block calculations (much slower)
1455 .    -mat_block_size - size of the blocks to use
1456 
1457    Level: intermediate
1458 
1459    Notes:
1460    The block AIJ format is fully compatible with standard Fortran 77
1461    storage.  That is, the stored row and column indices can begin at
1462    either one (as in Fortran) or zero.  See the users' manual for details.
1463 
1464    Specify the preallocated storage with either nz or nnz (not both).
1465    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1466    allocation.  For additional details, see the users manual chapter on
1467    matrices.
1468 
1469 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1470 @*/
1471 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1472 {
1473   Mat         B;
1474   Mat_SeqBAIJ *b;
1475   int         i,len,ierr,mbs,nbs,bs2,size;
1476   PetscTruth  flg;
1477 
1478   PetscFunctionBegin;
1479   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1480   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1481 
1482   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1483   mbs  = m/bs;
1484   nbs  = n/bs;
1485   bs2  = bs*bs;
1486 
1487   if (mbs*bs!=m || nbs*bs!=n) {
1488     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1489   }
1490 
1491   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1492   if (nnz) {
1493     for (i=0; i<mbs; i++) {
1494       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1495     }
1496   }
1497 
1498   *A = 0;
1499   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1500   PLogObjectCreate(B);
1501   B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1502   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1503   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1504   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1505   if (!flg) {
1506     switch (bs) {
1507     case 1:
1508       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1509       B->ops->solve           = MatSolve_SeqBAIJ_1;
1510       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1511       B->ops->mult            = MatMult_SeqBAIJ_1;
1512       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1513       break;
1514     case 2:
1515       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1516       B->ops->solve           = MatSolve_SeqBAIJ_2;
1517       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1518       B->ops->mult            = MatMult_SeqBAIJ_2;
1519       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1520       break;
1521     case 3:
1522       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1523       B->ops->solve           = MatSolve_SeqBAIJ_3;
1524       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1525       B->ops->mult            = MatMult_SeqBAIJ_3;
1526       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1527       break;
1528     case 4:
1529       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1530       B->ops->solve           = MatSolve_SeqBAIJ_4;
1531       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1532       B->ops->mult            = MatMult_SeqBAIJ_4;
1533       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1534       break;
1535     case 5:
1536       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1537       B->ops->solve           = MatSolve_SeqBAIJ_5;
1538       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1539       B->ops->mult            = MatMult_SeqBAIJ_5;
1540       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1541       break;
1542     case 6:
1543       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1544       B->ops->solve           = MatSolve_SeqBAIJ_6;
1545       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1546       B->ops->mult            = MatMult_SeqBAIJ_6;
1547       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1548       break;
1549     case 7:
1550       B->ops->mult            = MatMult_SeqBAIJ_7;
1551       B->ops->solve           = MatSolve_SeqBAIJ_7;
1552       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
1553       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1554       break;
1555     }
1556   }
1557   B->ops->destroy     = MatDestroy_SeqBAIJ;
1558   B->ops->view        = MatView_SeqBAIJ;
1559   B->factor           = 0;
1560   B->lupivotthreshold = 1.0;
1561   B->mapping          = 0;
1562   b->row              = 0;
1563   b->col              = 0;
1564   b->icol             = 0;
1565   b->reallocs         = 0;
1566   b->saved_values     = 0;
1567 
1568   b->m       = m; B->m = m; B->M = m;
1569   b->n       = n; B->n = n; B->N = n;
1570 
1571   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1572   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1573 
1574   b->mbs     = mbs;
1575   b->nbs     = nbs;
1576   b->imax    = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax);
1577   if (!nnz) {
1578     if (nz == PETSC_DEFAULT) nz = 5;
1579     else if (nz <= 0)        nz = 1;
1580     for (i=0; i<mbs; i++) b->imax[i] = nz;
1581     nz = nz*mbs;
1582   } else {
1583     nz = 0;
1584     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1585   }
1586 
1587   /* allocate the matrix space */
1588   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1589   b->a  = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a);
1590   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1591   b->j  = (int*)(b->a + nz*bs2);
1592   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1593   b->i  = b->j + nz;
1594   b->singlemalloc = PETSC_TRUE;
1595 
1596   b->i[0] = 0;
1597   for (i=1; i<mbs+1; i++) {
1598     b->i[i] = b->i[i-1] + b->imax[i-1];
1599   }
1600 
1601   /* b->ilen will count nonzeros in each block row so far. */
1602   b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));
1603   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1604   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1605 
1606   b->bs               = bs;
1607   b->bs2              = bs2;
1608   b->mbs              = mbs;
1609   b->nz               = 0;
1610   b->maxnz            = nz*bs2;
1611   b->sorted           = PETSC_FALSE;
1612   b->roworiented      = PETSC_TRUE;
1613   b->nonew            = 0;
1614   b->diag             = 0;
1615   b->solve_work       = 0;
1616   b->mult_work        = 0;
1617   b->spptr            = 0;
1618   B->info.nz_unneeded = (PetscReal)b->maxnz;
1619   b->keepzeroedrows   = PETSC_FALSE;
1620 
1621   *A = B;
1622   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1623   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1624 
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1626                                      "MatStoreValues_SeqBAIJ",
1627                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1629                                      "MatRetrieveValues_SeqBAIJ",
1630                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1632                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1633                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNC__
1638 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1639 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1640 {
1641   Mat         C;
1642   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1643   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1644 
1645   PetscFunctionBegin;
1646   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1647 
1648   *B = 0;
1649   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1650   PLogObjectCreate(C);
1651   C->data           = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1652   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1653   C->ops->destroy   = MatDestroy_SeqBAIJ;
1654   C->ops->view      = MatView_SeqBAIJ;
1655   C->factor         = A->factor;
1656   c->row            = 0;
1657   c->col            = 0;
1658   c->icol           = 0;
1659   c->saved_values   = 0;
1660   c->keepzeroedrows = a->keepzeroedrows;
1661   C->assembled      = PETSC_TRUE;
1662 
1663   c->m = C->m   = a->m;
1664   c->n = C->n   = a->n;
1665   C->M          = a->m;
1666   C->N          = a->n;
1667 
1668   c->bs         = a->bs;
1669   c->bs2        = a->bs2;
1670   c->mbs        = a->mbs;
1671   c->nbs        = a->nbs;
1672 
1673   c->imax       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1674   c->ilen       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1675   for (i=0; i<mbs; i++) {
1676     c->imax[i] = a->imax[i];
1677     c->ilen[i] = a->ilen[i];
1678   }
1679 
1680   /* allocate the matrix space */
1681   c->singlemalloc = PETSC_TRUE;
1682   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1683   c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a);
1684   c->j = (int*)(c->a + nz*bs2);
1685   c->i = c->j + nz;
1686   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1687   if (mbs > 0) {
1688     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1689     if (cpvalues == MAT_COPY_VALUES) {
1690       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1691     } else {
1692       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1693     }
1694   }
1695 
1696   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1697   c->sorted      = a->sorted;
1698   c->roworiented = a->roworiented;
1699   c->nonew       = a->nonew;
1700 
1701   if (a->diag) {
1702     c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag);
1703     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1704     for (i=0; i<mbs; i++) {
1705       c->diag[i] = a->diag[i];
1706     }
1707   } else c->diag        = 0;
1708   c->nz                 = a->nz;
1709   c->maxnz              = a->maxnz;
1710   c->solve_work         = 0;
1711   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1712   c->mult_work          = 0;
1713   *B = C;
1714   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #undef __FUNC__
1719 #define __FUNC__ "MatLoad_SeqBAIJ"
1720 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1721 {
1722   Mat_SeqBAIJ  *a;
1723   Mat          B;
1724   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1725   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1726   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1727   int          *masked,nmask,tmp,bs2,ishift;
1728   Scalar       *aa;
1729   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1730 
1731   PetscFunctionBegin;
1732   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1733   bs2  = bs*bs;
1734 
1735   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1736   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1737   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1738   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1739   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1740   M = header[1]; N = header[2]; nz = header[3];
1741 
1742   if (header[3] < 0) {
1743     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1744   }
1745 
1746   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1747 
1748   /*
1749      This code adds extra rows to make sure the number of rows is
1750     divisible by the blocksize
1751   */
1752   mbs        = M/bs;
1753   extra_rows = bs - M + bs*(mbs);
1754   if (extra_rows == bs) extra_rows = 0;
1755   else                  mbs++;
1756   if (extra_rows) {
1757     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1758   }
1759 
1760   /* read in row lengths */
1761   rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1762   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1763   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1764 
1765   /* read in column indices */
1766   jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj);
1767   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1768   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1769 
1770   /* loop over row lengths determining block row lengths */
1771   browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1772   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1773   mask        = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask);
1774   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1775   masked      = mask + mbs;
1776   rowcount    = 0; nzcount = 0;
1777   for (i=0; i<mbs; i++) {
1778     nmask = 0;
1779     for (j=0; j<bs; j++) {
1780       kmax = rowlengths[rowcount];
1781       for (k=0; k<kmax; k++) {
1782         tmp = jj[nzcount++]/bs;
1783         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1784       }
1785       rowcount++;
1786     }
1787     browlengths[i] += nmask;
1788     /* zero out the mask elements we set */
1789     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1790   }
1791 
1792   /* create our matrix */
1793   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1794   B = *A;
1795   a = (Mat_SeqBAIJ*)B->data;
1796 
1797   /* set matrix "i" values */
1798   a->i[0] = 0;
1799   for (i=1; i<= mbs; i++) {
1800     a->i[i]      = a->i[i-1] + browlengths[i-1];
1801     a->ilen[i-1] = browlengths[i-1];
1802   }
1803   a->nz         = 0;
1804   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1805 
1806   /* read in nonzero values */
1807   aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1808   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1809   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1810 
1811   /* set "a" and "j" values into matrix */
1812   nzcount = 0; jcount = 0;
1813   for (i=0; i<mbs; i++) {
1814     nzcountb = nzcount;
1815     nmask    = 0;
1816     for (j=0; j<bs; j++) {
1817       kmax = rowlengths[i*bs+j];
1818       for (k=0; k<kmax; k++) {
1819         tmp = jj[nzcount++]/bs;
1820 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1821       }
1822       rowcount++;
1823     }
1824     /* sort the masked values */
1825     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1826 
1827     /* set "j" values into matrix */
1828     maskcount = 1;
1829     for (j=0; j<nmask; j++) {
1830       a->j[jcount++]  = masked[j];
1831       mask[masked[j]] = maskcount++;
1832     }
1833     /* set "a" values into matrix */
1834     ishift = bs2*a->i[i];
1835     for (j=0; j<bs; j++) {
1836       kmax = rowlengths[i*bs+j];
1837       for (k=0; k<kmax; k++) {
1838         tmp       = jj[nzcountb]/bs ;
1839         block     = mask[tmp] - 1;
1840         point     = jj[nzcountb] - bs*tmp;
1841         idx       = ishift + bs2*block + j + bs*point;
1842         a->a[idx] = aa[nzcountb++];
1843       }
1844     }
1845     /* zero out the mask elements we set */
1846     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1847   }
1848   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1849 
1850   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1851   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1852   ierr = PetscFree(aa);CHKERRQ(ierr);
1853   ierr = PetscFree(jj);CHKERRQ(ierr);
1854   ierr = PetscFree(mask);CHKERRQ(ierr);
1855 
1856   B->assembled = PETSC_TRUE;
1857 
1858   ierr = MatView_Private(B);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 
1863 
1864