xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 549d3d68a6ae470532d58d544870024f02ff2d7c)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.173 1999/04/20 19:13:33 bsmith Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     PetscFree(*ia);
99     PetscFree(*ja);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   if (--A->refct > 0) PetscFunctionReturn(0);
128 
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(USE_PETSC_LOG)
142   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   PetscFree(a->a);
145   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
146   if (a->diag) PetscFree(a->diag);
147   if (a->ilen) PetscFree(a->ilen);
148   if (a->imax) PetscFree(a->imax);
149   if (a->solve_work) PetscFree(a->solve_work);
150   if (a->mult_work) PetscFree(a->mult_work);
151   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
152   if (a->saved_values) PetscFree(a->saved_values);
153   PetscFree(a);
154   PLogObjectDestroy(A);
155   PetscHeaderDestroy(A);
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNC__
160 #define __FUNC__ "MatSetOption_SeqBAIJ"
161 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
162 {
163   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
164 
165   PetscFunctionBegin;
166   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
167   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
168   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
169   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
170   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
171   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
172   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
173   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
174   else if (op == MAT_ROWS_SORTED ||
175            op == MAT_ROWS_UNSORTED ||
176            op == MAT_SYMMETRIC ||
177            op == MAT_STRUCTURALLY_SYMMETRIC ||
178            op == MAT_YES_NEW_DIAGONALS ||
179            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
180            op == MAT_USE_HASH_TABLE) {
181     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
182   } else if (op == MAT_NO_NEW_DIAGONALS) {
183     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
184   } else {
185     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 
191 #undef __FUNC__
192 #define __FUNC__ "MatGetSize_SeqBAIJ"
193 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
194 {
195   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
196 
197   PetscFunctionBegin;
198   if (m) *m = a->m;
199   if (n) *n = a->n;
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
205 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
206 {
207   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
208 
209   PetscFunctionBegin;
210   *m = 0; *n = a->m;
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNC__
215 #define __FUNC__ "MatGetRow_SeqBAIJ"
216 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
217 {
218   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
219   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
220   MatScalar    *aa,*aa_i;
221   Scalar       *v_i;
222 
223   PetscFunctionBegin;
224   bs  = a->bs;
225   ai  = a->i;
226   aj  = a->j;
227   aa  = a->a;
228   bs2 = a->bs2;
229 
230   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
231 
232   bn  = row/bs;   /* Block number */
233   bp  = row % bs; /* Block Position */
234   M   = ai[bn+1] - ai[bn];
235   *nz = bs*M;
236 
237   if (v) {
238     *v = 0;
239     if (*nz) {
240       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
241       for ( i=0; i<M; i++ ) { /* for each block in the block row */
242         v_i  = *v + i*bs;
243         aa_i = aa + bs2*(ai[bn] + i);
244         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
245       }
246     }
247   }
248 
249   if (idx) {
250     *idx = 0;
251     if (*nz) {
252       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
253       for ( i=0; i<M; i++ ) { /* for each block in the block row */
254         idx_i = *idx + i*bs;
255         itmp  = bs*aj[ai[bn] + i];
256         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNC__
264 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
265 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
266 {
267   PetscFunctionBegin;
268   if (idx) {if (*idx) PetscFree(*idx);}
269   if (v)   {if (*v)   PetscFree(*v);}
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatTranspose_SeqBAIJ"
275 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
276 {
277   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
278   Mat         C;
279   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
280   int         *rows,*cols,bs2=a->bs2;
281   MatScalar   *array = a->a;
282 
283   PetscFunctionBegin;
284   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
285   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
286   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
287 
288   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
289   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
290   PetscFree(col);
291   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
292   cols = rows + bs;
293   for ( i=0; i<mbs; i++ ) {
294     cols[0] = i*bs;
295     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
296     len = ai[i+1] - ai[i];
297     for ( j=0; j<len; j++ ) {
298       rows[0] = (*aj++)*bs;
299       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
300       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
301       array += bs2;
302     }
303   }
304   PetscFree(rows);
305 
306   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
308 
309   if (B != PETSC_NULL) {
310     *B = C;
311   } else {
312     PetscOps *Abops;
313     MatOps   Aops;
314 
315     /* This isn't really an in-place transpose */
316     PetscFree(a->a);
317     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
318     if (a->diag) PetscFree(a->diag);
319     if (a->ilen) PetscFree(a->ilen);
320     if (a->imax) PetscFree(a->imax);
321     if (a->solve_work) PetscFree(a->solve_work);
322     PetscFree(a);
323 
324 
325     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
326     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
327 
328     /*
329        This is horrible, horrible code. We need to keep the
330       A pointers for the bops and ops but copy everything
331       else from C.
332     */
333     Abops    = A->bops;
334     Aops     = A->ops;
335     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
336     A->bops  = Abops;
337     A->ops   = Aops;
338     A->qlist = 0;
339 
340     PetscHeaderDestroy(C);
341   }
342   PetscFunctionReturn(0);
343 }
344 
345 #undef __FUNC__
346 #define __FUNC__ "MatView_SeqBAIJ_Binary"
347 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
348 {
349   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
350   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
351   Scalar      *aa;
352   FILE        *file;
353 
354   PetscFunctionBegin;
355   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
356   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
357   col_lens[0] = MAT_COOKIE;
358 
359   col_lens[1] = a->m;
360   col_lens[2] = a->n;
361   col_lens[3] = a->nz*bs2;
362 
363   /* store lengths of each row and write (including header) to file */
364   count = 0;
365   for ( i=0; i<a->mbs; i++ ) {
366     for ( j=0; j<bs; j++ ) {
367       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
368     }
369   }
370   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
371   PetscFree(col_lens);
372 
373   /* store column indices (zero start index) */
374   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
375   count = 0;
376   for ( i=0; i<a->mbs; i++ ) {
377     for ( j=0; j<bs; j++ ) {
378       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
379         for ( l=0; l<bs; l++ ) {
380           jj[count++] = bs*a->j[k] + l;
381         }
382       }
383     }
384   }
385   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
386   PetscFree(jj);
387 
388   /* store nonzero values */
389   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
390   count = 0;
391   for ( i=0; i<a->mbs; i++ ) {
392     for ( j=0; j<bs; j++ ) {
393       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
394         for ( l=0; l<bs; l++ ) {
395           aa[count++] = a->a[bs2*k + l*bs + j];
396         }
397       }
398     }
399   }
400   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
401   PetscFree(aa);
402 
403   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
404   if (file) {
405     fprintf(file,"-matload_block_size %d\n",a->bs);
406   }
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNC__
411 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
412 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
413 {
414   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
415   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
416   FILE        *fd;
417   char        *outputname;
418 
419   PetscFunctionBegin;
420   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
421   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
422   ierr = ViewerGetFormat(viewer,&format);
423   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
424     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
425   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
426     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
427   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
428     for ( i=0; i<a->mbs; i++ ) {
429       for ( j=0; j<bs; j++ ) {
430         fprintf(fd,"row %d:",i*bs+j);
431         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
432           for ( l=0; l<bs; l++ ) {
433 #if defined(USE_PETSC_COMPLEX)
434             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
435               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
436                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
437             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
438               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
439                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
440             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
441               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
442             }
443 #else
444             if (a->a[bs2*k + l*bs + j] != 0.0) {
445               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
446             }
447 #endif
448           }
449         }
450         fprintf(fd,"\n");
451       }
452     }
453   } else {
454     for ( i=0; i<a->mbs; i++ ) {
455       for ( j=0; j<bs; j++ ) {
456         fprintf(fd,"row %d:",i*bs+j);
457         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
458           for ( l=0; l<bs; l++ ) {
459 #if defined(USE_PETSC_COMPLEX)
460             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
461               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
462                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
463             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
464               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
465                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
466             } else {
467               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
468             }
469 #else
470             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
471 #endif
472           }
473         }
474         fprintf(fd,"\n");
475       }
476     }
477   }
478   fflush(fd);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNC__
483 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
484 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
485 {
486   Mat          A = (Mat) Aa;
487   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
488   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
489   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
490   MatScalar    *aa;
491   MPI_Comm     comm;
492   Viewer       viewer;
493 
494   PetscFunctionBegin;
495   /*
496       This is nasty. If this is called from an originally parallel matrix
497    then all processes call this, but only the first has the matrix so the
498    rest should return immediately.
499   */
500   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
502   if (rank) PetscFunctionReturn(0);
503 
504   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
505 
506   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
507 
508   /* loop over matrix elements drawing boxes */
509   color = DRAW_BLUE;
510   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
511     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
512       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
513       x_l = a->j[j]*bs; x_r = x_l + 1.0;
514       aa = a->a + j*bs2;
515       for ( k=0; k<bs; k++ ) {
516         for ( l=0; l<bs; l++ ) {
517           if (PetscReal(*aa++) >=  0.) continue;
518           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
519         }
520       }
521     }
522   }
523   color = DRAW_CYAN;
524   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
525     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
526       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
527       x_l = a->j[j]*bs; x_r = x_l + 1.0;
528       aa = a->a + j*bs2;
529       for ( k=0; k<bs; k++ ) {
530         for ( l=0; l<bs; l++ ) {
531           if (PetscReal(*aa++) != 0.) continue;
532           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
533         }
534       }
535     }
536   }
537 
538   color = DRAW_RED;
539   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
540     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
541       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
542       x_l = a->j[j]*bs; x_r = x_l + 1.0;
543       aa = a->a + j*bs2;
544       for ( k=0; k<bs; k++ ) {
545         for ( l=0; l<bs; l++ ) {
546           if (PetscReal(*aa++) <= 0.) continue;
547           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNC__
556 #define __FUNC__ "MatView_SeqBAIJ_Draw"
557 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
558 {
559   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
560   int          ierr;
561   double       xl,yl,xr,yr,w,h;
562   Draw         draw;
563   PetscTruth   isnull;
564 
565   PetscFunctionBegin;
566 
567   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
568   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
569 
570   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
571   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
572   xr += w;    yr += h;  xl = -w;     yl = -h;
573   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
574   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
575   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatView_SeqBAIJ"
581 int MatView_SeqBAIJ(Mat A,Viewer viewer)
582 {
583   ViewerType  vtype;
584   int         ierr;
585 
586   PetscFunctionBegin;
587   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
588   if (PetscTypeCompare(vtype,SOCKET_VIEWER)) {
589     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
590   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
591     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
592   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
593     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
594   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
595     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
596   } else {
597     SETERRQ(1,1,"Viewer type not supported by PETSc object");
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatGetValues_SeqBAIJ"
605 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
606 {
607   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
608   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
609   int        *ai = a->i, *ailen = a->ilen;
610   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
611   MatScalar  *ap, *aa = a->a, zero = 0.0;
612 
613   PetscFunctionBegin;
614   for ( k=0; k<m; k++ ) { /* loop over rows */
615     row  = im[k]; brow = row/bs;
616     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
617     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
618     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
619     nrow = ailen[brow];
620     for ( l=0; l<n; l++ ) { /* loop over columns */
621       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
622       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
623       col  = in[l] ;
624       bcol = col/bs;
625       cidx = col%bs;
626       ridx = row%bs;
627       high = nrow;
628       low  = 0; /* assume unsorted */
629       while (high-low > 5) {
630         t = (low+high)/2;
631         if (rp[t] > bcol) high = t;
632         else             low  = t;
633       }
634       for ( i=low; i<high; i++ ) {
635         if (rp[i] > bcol) break;
636         if (rp[i] == bcol) {
637           *v++ = ap[bs2*i+bs*cidx+ridx];
638           goto finished;
639         }
640       }
641       *v++ = zero;
642       finished:;
643     }
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 
649 #undef __FUNC__
650 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
651 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
652 {
653   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
654   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
655   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
656   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
657   Scalar      *value = v;
658   MatScalar   *ap,*aa=a->a,*bap;
659 
660   PetscFunctionBegin;
661   if (roworiented) {
662     stepval = (n-1)*bs;
663   } else {
664     stepval = (m-1)*bs;
665   }
666   for ( k=0; k<m; k++ ) { /* loop over added rows */
667     row  = im[k];
668     if (row < 0) continue;
669 #if defined(USE_PETSC_BOPT_g)
670     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
671 #endif
672     rp   = aj + ai[row];
673     ap   = aa + bs2*ai[row];
674     rmax = imax[row];
675     nrow = ailen[row];
676     low  = 0;
677     for ( l=0; l<n; l++ ) { /* loop over added columns */
678       if (in[l] < 0) continue;
679 #if defined(USE_PETSC_BOPT_g)
680       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
681 #endif
682       col = in[l];
683       if (roworiented) {
684         value = v + k*(stepval+bs)*bs + l*bs;
685       } else {
686         value = v + l*(stepval+bs)*bs + k*bs;
687       }
688       if (!sorted) low = 0; high = nrow;
689       while (high-low > 7) {
690         t = (low+high)/2;
691         if (rp[t] > col) high = t;
692         else             low  = t;
693       }
694       for ( i=low; i<high; i++ ) {
695         if (rp[i] > col) break;
696         if (rp[i] == col) {
697           bap  = ap +  bs2*i;
698           if (roworiented) {
699             if (is == ADD_VALUES) {
700               for ( ii=0; ii<bs; ii++,value+=stepval ) {
701                 for (jj=ii; jj<bs2; jj+=bs ) {
702                   bap[jj] += *value++;
703                 }
704               }
705             } else {
706               for ( ii=0; ii<bs; ii++,value+=stepval ) {
707                 for (jj=ii; jj<bs2; jj+=bs ) {
708                   bap[jj] = *value++;
709                 }
710               }
711             }
712           } else {
713             if (is == ADD_VALUES) {
714               for ( ii=0; ii<bs; ii++,value+=stepval ) {
715                 for (jj=0; jj<bs; jj++ ) {
716                   *bap++ += *value++;
717                 }
718               }
719             } else {
720               for ( ii=0; ii<bs; ii++,value+=stepval ) {
721                 for (jj=0; jj<bs; jj++ ) {
722                   *bap++  = *value++;
723                 }
724               }
725             }
726           }
727           goto noinsert2;
728         }
729       }
730       if (nonew == 1) goto noinsert2;
731       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
732       if (nrow >= rmax) {
733         /* there is no extra room in row, therefore enlarge */
734         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
735         MatScalar *new_a;
736 
737         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
738 
739         /* malloc new storage space */
740         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
741         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
742         new_j   = (int *) (new_a + bs2*new_nz);
743         new_i   = new_j + new_nz;
744 
745         /* copy over old data into new slots */
746         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
747         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
748         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
749         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
750         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
751         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
752         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
753         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
754         /* free up old matrix storage */
755         PetscFree(a->a);
756         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
757         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
758         a->singlemalloc = 1;
759 
760         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
761         rmax = imax[row] = imax[row] + CHUNKSIZE;
762         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
763         a->maxnz += bs2*CHUNKSIZE;
764         a->reallocs++;
765         a->nz++;
766       }
767       N = nrow++ - 1;
768       /* shift up all the later entries in this row */
769       for ( ii=N; ii>=i; ii-- ) {
770         rp[ii+1] = rp[ii];
771         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
772       }
773       if (N >= i) {
774         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
775       }
776       rp[i] = col;
777       bap   = ap +  bs2*i;
778       if (roworiented) {
779         for ( ii=0; ii<bs; ii++,value+=stepval) {
780           for (jj=ii; jj<bs2; jj+=bs ) {
781             bap[jj] = *value++;
782           }
783         }
784       } else {
785         for ( ii=0; ii<bs; ii++,value+=stepval ) {
786           for (jj=0; jj<bs; jj++ ) {
787             *bap++  = *value++;
788           }
789         }
790       }
791       noinsert2:;
792       low = i;
793     }
794     ailen[row] = nrow;
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
802 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
803 {
804   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
805   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
806   int        m = a->m,*ip, N, *ailen = a->ilen;
807   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
808   MatScalar  *aa = a->a, *ap;
809 
810   PetscFunctionBegin;
811   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
812 
813   if (m) rmax = ailen[0];
814   for ( i=1; i<mbs; i++ ) {
815     /* move each row back by the amount of empty slots (fshift) before it*/
816     fshift += imax[i-1] - ailen[i-1];
817     rmax   = PetscMax(rmax,ailen[i]);
818     if (fshift) {
819       ip = aj + ai[i]; ap = aa + bs2*ai[i];
820       N = ailen[i];
821       for ( j=0; j<N; j++ ) {
822         ip[j-fshift] = ip[j];
823         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
824       }
825     }
826     ai[i] = ai[i-1] + ailen[i-1];
827   }
828   if (mbs) {
829     fshift += imax[mbs-1] - ailen[mbs-1];
830     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
831   }
832   /* reset ilen and imax for each row */
833   for ( i=0; i<mbs; i++ ) {
834     ailen[i] = imax[i] = ai[i+1] - ai[i];
835   }
836   a->nz = ai[mbs];
837 
838   /* diagonals may have moved, so kill the diagonal pointers */
839   if (fshift && a->diag) {
840     PetscFree(a->diag);
841     PLogObjectMemory(A,-(m+1)*sizeof(int));
842     a->diag = 0;
843   }
844   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
845            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
846   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
847            a->reallocs);
848   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
849   a->reallocs          = 0;
850   A->info.nz_unneeded  = (double)fshift*bs2;
851 
852   PetscFunctionReturn(0);
853 }
854 
855 
856 
857 /*
858    This function returns an array of flags which indicate the locations of contiguous
859    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
860    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
861    Assume: sizes should be long enough to hold all the values.
862 */
863 #undef __FUNC__
864 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
865 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
866 {
867   int i,j,k,row;
868   PetscTruth flg;
869 
870   /*   PetscFunctionBegin;*/
871   for ( i=0,j=0; i<n; j++ ) {
872     row = idx[i];
873     if (row%bs!=0) { /* Not the begining of a block */
874       sizes[j] = 1;
875       i++;
876     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
877       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
878       i++;
879     } else { /* Begining of the block, so check if the complete block exists */
880       flg = PETSC_TRUE;
881       for ( k=1; k<bs; k++ ) {
882         if (row+k != idx[i+k]) { /* break in the block */
883           flg = PETSC_FALSE;
884           break;
885         }
886       }
887       if (flg == PETSC_TRUE) { /* No break in the bs */
888         sizes[j] = bs;
889         i+= bs;
890       } else {
891         sizes[j] = 1;
892         i++;
893       }
894     }
895   }
896   *bs_max = j;
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNC__
901 #define __FUNC__ "MatZeroRows_SeqBAIJ"
902 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
903 {
904   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
905   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
906   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
907   Scalar      zero = 0.0;
908   MatScalar   *aa;
909 
910   PetscFunctionBegin;
911   /* Make a copy of the IS and  sort it */
912   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
913   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
914 
915   /* allocate memory for rows,sizes */
916   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
917   sizes = rows + is_n;
918 
919   /* initialize copy IS valurs to rows, and sort them */
920   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
921   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
922   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
924 
925   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
926     row   = rows[j];
927     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
928     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
929     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
930     if (sizes[i] == bs) {
931       if (diag) {
932         if (baij->ilen[row/bs] > 0) {
933           baij->ilen[row/bs] = 1;
934           baij->j[baij->i[row/bs]] = row/bs;
935           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
936         }
937         /* Now insert all the diagoanl values for this bs */
938         for ( k=0; k<bs; k++ ) {
939           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
940         }
941       } else { /* (!diag) */
942         baij->ilen[row/bs] = 0;
943       } /* end (!diag) */
944     } else { /* (sizes[i] != bs) */
945 #if defined (USE_PETSC_DEBUG)
946       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
947 #endif
948       for ( k=0; k<count; k++ ) {
949         aa[0] = zero;
950         aa+=bs;
951       }
952       if (diag) {
953         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
954       }
955     }
956   }
957 
958   PetscFree(rows);
959   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNC__
964 #define __FUNC__ "MatSetValues_SeqBAIJ"
965 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
966 {
967   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
968   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
969   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
970   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
971   int         ridx,cidx,bs2=a->bs2,ierr;
972   MatScalar   *ap,value,*aa=a->a,*bap;
973 
974   PetscFunctionBegin;
975   for ( k=0; k<m; k++ ) { /* loop over added rows */
976     row  = im[k]; brow = row/bs;
977     if (row < 0) continue;
978 #if defined(USE_PETSC_BOPT_g)
979     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
980 #endif
981     rp   = aj + ai[brow];
982     ap   = aa + bs2*ai[brow];
983     rmax = imax[brow];
984     nrow = ailen[brow];
985     low  = 0;
986     for ( l=0; l<n; l++ ) { /* loop over added columns */
987       if (in[l] < 0) continue;
988 #if defined(USE_PETSC_BOPT_g)
989       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
990 #endif
991       col = in[l]; bcol = col/bs;
992       ridx = row % bs; cidx = col % bs;
993       if (roworiented) {
994         value = v[l + k*n];
995       } else {
996         value = v[k + l*m];
997       }
998       if (!sorted) low = 0; high = nrow;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for ( i=low; i<high; i++ ) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap  = ap +  bs2*i + bs*cidx + ridx;
1008           if (is == ADD_VALUES) *bap += value;
1009           else                  *bap  = value;
1010           goto noinsert1;
1011         }
1012       }
1013       if (nonew == 1) goto noinsert1;
1014       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1015       if (nrow >= rmax) {
1016         /* there is no extra room in row, therefore enlarge */
1017         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1018         MatScalar *new_a;
1019 
1020         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1021 
1022         /* Malloc new storage space */
1023         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1024         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1025         new_j   = (int *) (new_a + bs2*new_nz);
1026         new_i   = new_j + new_nz;
1027 
1028         /* copy over old data into new slots */
1029         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1030         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1031         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1032         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1033         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1034         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1035         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1036         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1037         /* free up old matrix storage */
1038         PetscFree(a->a);
1039         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
1040         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1041         a->singlemalloc = 1;
1042 
1043         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1044         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1045         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1046         a->maxnz += bs2*CHUNKSIZE;
1047         a->reallocs++;
1048         a->nz++;
1049       }
1050       N = nrow++ - 1;
1051       /* shift up all the later entries in this row */
1052       for ( ii=N; ii>=i; ii-- ) {
1053         rp[ii+1] = rp[ii];
1054         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1055       }
1056       if (N>=i) {
1057         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1058       }
1059       rp[i]                      = bcol;
1060       ap[bs2*i + bs*cidx + ridx] = value;
1061       noinsert1:;
1062       low = i;
1063     }
1064     ailen[brow] = nrow;
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1070 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1071 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1072 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1073 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1074 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1075 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1076 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1077 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1078 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1079 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1080 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1081 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1082 extern int MatZeroEntries_SeqBAIJ(Mat);
1083 
1084 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1085 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1086 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1087 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1088 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1089 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1090 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1091 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1092 
1093 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1094 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1095 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1096 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1097 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1098 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1099 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1100 
1101 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1102 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1103 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1104 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1105 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1106 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1107 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1108 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1109 
1110 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1111 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1112 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1113 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1114 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1115 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1116 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1117 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1121 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1122 {
1123   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1124   Mat         outA;
1125   int         ierr;
1126   PetscTruth  row_identity, col_identity;
1127 
1128   PetscFunctionBegin;
1129   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1130   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1131   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1132   if (!row_identity || !col_identity) {
1133     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1134   }
1135 
1136   outA          = inA;
1137   inA->factor   = FACTOR_LU;
1138   a->row        = row;
1139   a->col        = col;
1140 
1141   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1142   ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1143   PLogObjectParent(inA,a->icol);
1144 
1145   if (!a->solve_work) {
1146     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1147     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1148   }
1149 
1150   if (!a->diag) {
1151     ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr);
1152   }
1153   /*
1154       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1155       for ILU(0) factorization with natural ordering
1156   */
1157   switch (a->bs) {
1158     case 2:
1159     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1160     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1161     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1162     break;
1163   case 3:
1164     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1165     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1166     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1167     break;
1168   case 4:
1169     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1170     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1171     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1172     break;
1173   case 5:
1174     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1175     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1176     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1177     break;
1178   case 6:
1179     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1180     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1181     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1182     break;
1183   case 7:
1184     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1185     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1186     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1187     break;
1188   }
1189 
1190   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 #undef __FUNC__
1195 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1196 int MatPrintHelp_SeqBAIJ(Mat A)
1197 {
1198   static int called = 0;
1199   MPI_Comm   comm = A->comm;
1200   int        ierr;
1201 
1202   PetscFunctionBegin;
1203   if (called) {PetscFunctionReturn(0);} else called = 1;
1204   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1205   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 EXTERN_C_BEGIN
1210 #undef __FUNC__
1211 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1212 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1213 {
1214   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1215   int         i,nz,n;
1216 
1217   PetscFunctionBegin;
1218   nz = baij->maxnz;
1219   n  = baij->n;
1220   for (i=0; i<nz; i++) {
1221     baij->j[i] = indices[i];
1222   }
1223   baij->nz = nz;
1224   for ( i=0; i<n; i++ ) {
1225     baij->ilen[i] = baij->imax[i];
1226   }
1227 
1228   PetscFunctionReturn(0);
1229 }
1230 EXTERN_C_END
1231 
1232 #undef __FUNC__
1233 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1234 /*@
1235     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1236        in the matrix.
1237 
1238   Input Parameters:
1239 +  mat - the SeqBAIJ matrix
1240 -  indices - the column indices
1241 
1242   Level: advanced
1243 
1244   Notes:
1245     This can be called if you have precomputed the nonzero structure of the
1246   matrix and want to provide it to the matrix object to improve the performance
1247   of the MatSetValues() operation.
1248 
1249     You MUST have set the correct numbers of nonzeros per row in the call to
1250   MatCreateSeqBAIJ().
1251 
1252     MUST be called before any calls to MatSetValues();
1253 
1254 @*/
1255 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1256 {
1257   int ierr,(*f)(Mat,int *);
1258 
1259   PetscFunctionBegin;
1260   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1261   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1262   if (f) {
1263     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1264   } else {
1265     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 /* -------------------------------------------------------------------*/
1271 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1272        MatGetRow_SeqBAIJ,
1273        MatRestoreRow_SeqBAIJ,
1274        MatMult_SeqBAIJ_N,
1275        MatMultAdd_SeqBAIJ_N,
1276        MatMultTrans_SeqBAIJ,
1277        MatMultTransAdd_SeqBAIJ,
1278        MatSolve_SeqBAIJ_N,
1279        0,
1280        0,
1281        0,
1282        MatLUFactor_SeqBAIJ,
1283        0,
1284        0,
1285        MatTranspose_SeqBAIJ,
1286        MatGetInfo_SeqBAIJ,
1287        MatEqual_SeqBAIJ,
1288        MatGetDiagonal_SeqBAIJ,
1289        MatDiagonalScale_SeqBAIJ,
1290        MatNorm_SeqBAIJ,
1291        0,
1292        MatAssemblyEnd_SeqBAIJ,
1293        0,
1294        MatSetOption_SeqBAIJ,
1295        MatZeroEntries_SeqBAIJ,
1296        MatZeroRows_SeqBAIJ,
1297        MatLUFactorSymbolic_SeqBAIJ,
1298        MatLUFactorNumeric_SeqBAIJ_N,
1299        0,
1300        0,
1301        MatGetSize_SeqBAIJ,
1302        MatGetSize_SeqBAIJ,
1303        MatGetOwnershipRange_SeqBAIJ,
1304        MatILUFactorSymbolic_SeqBAIJ,
1305        0,
1306        0,
1307        0,
1308        MatDuplicate_SeqBAIJ,
1309        0,
1310        0,
1311        MatILUFactor_SeqBAIJ,
1312        0,
1313        0,
1314        MatGetSubMatrices_SeqBAIJ,
1315        MatIncreaseOverlap_SeqBAIJ,
1316        MatGetValues_SeqBAIJ,
1317        0,
1318        MatPrintHelp_SeqBAIJ,
1319        MatScale_SeqBAIJ,
1320        0,
1321        0,
1322        0,
1323        MatGetBlockSize_SeqBAIJ,
1324        MatGetRowIJ_SeqBAIJ,
1325        MatRestoreRowIJ_SeqBAIJ,
1326        0,
1327        0,
1328        0,
1329        0,
1330        0,
1331        0,
1332        MatSetValuesBlocked_SeqBAIJ,
1333        MatGetSubMatrix_SeqBAIJ,
1334        0,
1335        0,
1336        MatGetMaps_Petsc};
1337 
1338 EXTERN_C_BEGIN
1339 #undef __FUNC__
1340 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1341 int MatStoreValues_SeqBAIJ(Mat mat)
1342 {
1343   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1344   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1345   int         ierr;
1346 
1347   PetscFunctionBegin;
1348   if (aij->nonew != 1) {
1349     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1350   }
1351 
1352   /* allocate space for values if not already there */
1353   if (!aij->saved_values) {
1354     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1355   }
1356 
1357   /* copy values over */
1358   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 EXTERN_C_END
1362 
1363 EXTERN_C_BEGIN
1364 #undef __FUNC__
1365 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1366 int MatRetrieveValues_SeqBAIJ(Mat mat)
1367 {
1368   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1369   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1370 
1371   PetscFunctionBegin;
1372   if (aij->nonew != 1) {
1373     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1374   }
1375   if (!aij->saved_values) {
1376     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1377   }
1378 
1379   /* copy values over */
1380   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 EXTERN_C_END
1384 
1385 #undef __FUNC__
1386 #define __FUNC__ "MatCreateSeqBAIJ"
1387 /*@C
1388    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1389    compressed row) format.  For good matrix assembly performance the
1390    user should preallocate the matrix storage by setting the parameter nz
1391    (or the array nnz).  By setting these parameters accurately, performance
1392    during matrix assembly can be increased by more than a factor of 50.
1393 
1394    Collective on MPI_Comm
1395 
1396    Input Parameters:
1397 +  comm - MPI communicator, set to PETSC_COMM_SELF
1398 .  bs - size of block
1399 .  m - number of rows
1400 .  n - number of columns
1401 .  nz - number of block nonzeros per block row (same for all rows)
1402 -  nnz - array containing the number of block nonzeros in the various block rows
1403          (possibly different for each block row) or PETSC_NULL
1404 
1405    Output Parameter:
1406 .  A - the matrix
1407 
1408    Options Database Keys:
1409 .   -mat_no_unroll - uses code that does not unroll the loops in the
1410                      block calculations (much slower)
1411 .    -mat_block_size - size of the blocks to use
1412 
1413    Level: intermediate
1414 
1415    Notes:
1416    The block AIJ format is fully compatible with standard Fortran 77
1417    storage.  That is, the stored row and column indices can begin at
1418    either one (as in Fortran) or zero.  See the users' manual for details.
1419 
1420    Specify the preallocated storage with either nz or nnz (not both).
1421    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1422    allocation.  For additional details, see the users manual chapter on
1423    matrices.
1424 
1425 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1426 @*/
1427 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1428 {
1429   Mat         B;
1430   Mat_SeqBAIJ *b;
1431   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1432 
1433   PetscFunctionBegin;
1434   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1435   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1436 
1437   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1438 
1439   if (mbs*bs!=m || nbs*bs!=n) {
1440     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1441   }
1442 
1443   *A = 0;
1444   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1445   PLogObjectCreate(B);
1446   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1447   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1448   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1449   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1450   if (!flg) {
1451     switch (bs) {
1452     case 1:
1453       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1454       B->ops->solve           = MatSolve_SeqBAIJ_1;
1455       B->ops->mult            = MatMult_SeqBAIJ_1;
1456       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1457       break;
1458     case 2:
1459       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1460       B->ops->solve           = MatSolve_SeqBAIJ_2;
1461       B->ops->mult            = MatMult_SeqBAIJ_2;
1462       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1463       break;
1464     case 3:
1465       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1466       B->ops->solve           = MatSolve_SeqBAIJ_3;
1467       B->ops->mult            = MatMult_SeqBAIJ_3;
1468       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1469       break;
1470     case 4:
1471       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1472       B->ops->solve           = MatSolve_SeqBAIJ_4;
1473       B->ops->mult            = MatMult_SeqBAIJ_4;
1474       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1475       break;
1476     case 5:
1477       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1478       B->ops->solve           = MatSolve_SeqBAIJ_5;
1479       B->ops->mult            = MatMult_SeqBAIJ_5;
1480       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1481       break;
1482     case 6:
1483       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1484       B->ops->solve           = MatSolve_SeqBAIJ_6;
1485       B->ops->mult            = MatMult_SeqBAIJ_6;
1486       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1487       break;
1488     case 7:
1489       B->ops->mult            = MatMult_SeqBAIJ_7;
1490       B->ops->solve           = MatSolve_SeqBAIJ_7;
1491       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1492       break;
1493     }
1494   }
1495   B->ops->destroy     = MatDestroy_SeqBAIJ;
1496   B->ops->view        = MatView_SeqBAIJ;
1497   B->factor           = 0;
1498   B->lupivotthreshold = 1.0;
1499   B->mapping          = 0;
1500   b->row              = 0;
1501   b->col              = 0;
1502   b->icol             = 0;
1503   b->reallocs         = 0;
1504   b->saved_values     = 0;
1505 
1506   b->m       = m; B->m = m; B->M = m;
1507   b->n       = n; B->n = n; B->N = n;
1508 
1509   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1510   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1511 
1512   b->mbs     = mbs;
1513   b->nbs     = nbs;
1514   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1515   if (nnz == PETSC_NULL) {
1516     if (nz == PETSC_DEFAULT) nz = 5;
1517     else if (nz <= 0)        nz = 1;
1518     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1519     nz = nz*mbs;
1520   } else {
1521     nz = 0;
1522     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1523   }
1524 
1525   /* allocate the matrix space */
1526   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1527   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1528   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1529   b->j  = (int *) (b->a + nz*bs2);
1530   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1531   b->i  = b->j + nz;
1532   b->singlemalloc = 1;
1533 
1534   b->i[0] = 0;
1535   for (i=1; i<mbs+1; i++) {
1536     b->i[i] = b->i[i-1] + b->imax[i-1];
1537   }
1538 
1539   /* b->ilen will count nonzeros in each block row so far. */
1540   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1541   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1542   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1543 
1544   b->bs               = bs;
1545   b->bs2              = bs2;
1546   b->mbs              = mbs;
1547   b->nz               = 0;
1548   b->maxnz            = nz*bs2;
1549   b->sorted           = 0;
1550   b->roworiented      = 1;
1551   b->nonew            = 0;
1552   b->diag             = 0;
1553   b->solve_work       = 0;
1554   b->mult_work        = 0;
1555   b->spptr            = 0;
1556   B->info.nz_unneeded = (double)b->maxnz;
1557 
1558   *A = B;
1559   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1560   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1561 
1562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1563                                      "MatStoreValues_SeqBAIJ",
1564                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1566                                      "MatRetrieveValues_SeqBAIJ",
1567                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1569                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1570                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNC__
1575 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1576 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1577 {
1578   Mat         C;
1579   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1580   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1581 
1582   PetscFunctionBegin;
1583   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1584 
1585   *B = 0;
1586   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1587   PLogObjectCreate(C);
1588   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1589   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1590   C->ops->destroy = MatDestroy_SeqBAIJ;
1591   C->ops->view    = MatView_SeqBAIJ;
1592   C->factor       = A->factor;
1593   c->row          = 0;
1594   c->col          = 0;
1595   c->icol         = 0;
1596   c->saved_values = 0;
1597   C->assembled    = PETSC_TRUE;
1598 
1599   c->m = C->m   = a->m;
1600   c->n = C->n   = a->n;
1601   C->M          = a->m;
1602   C->N          = a->n;
1603 
1604   c->bs         = a->bs;
1605   c->bs2        = a->bs2;
1606   c->mbs        = a->mbs;
1607   c->nbs        = a->nbs;
1608 
1609   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1610   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1611   for ( i=0; i<mbs; i++ ) {
1612     c->imax[i] = a->imax[i];
1613     c->ilen[i] = a->ilen[i];
1614   }
1615 
1616   /* allocate the matrix space */
1617   c->singlemalloc = 1;
1618   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1619   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1620   c->j = (int *) (c->a + nz*bs2);
1621   c->i = c->j + nz;
1622   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1623   if (mbs > 0) {
1624     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1625     if (cpvalues == MAT_COPY_VALUES) {
1626       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1627     } else {
1628       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1629     }
1630   }
1631 
1632   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1633   c->sorted      = a->sorted;
1634   c->roworiented = a->roworiented;
1635   c->nonew       = a->nonew;
1636 
1637   if (a->diag) {
1638     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1639     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1640     for ( i=0; i<mbs; i++ ) {
1641       c->diag[i] = a->diag[i];
1642     }
1643   } else c->diag        = 0;
1644   c->nz                 = a->nz;
1645   c->maxnz              = a->maxnz;
1646   c->solve_work         = 0;
1647   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1648   c->mult_work          = 0;
1649   *B = C;
1650   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNC__
1655 #define __FUNC__ "MatLoad_SeqBAIJ"
1656 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1657 {
1658   Mat_SeqBAIJ  *a;
1659   Mat          B;
1660   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1661   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1662   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1663   int          *masked, nmask,tmp,bs2,ishift;
1664   Scalar       *aa;
1665   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1666 
1667   PetscFunctionBegin;
1668   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1669   bs2  = bs*bs;
1670 
1671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1672   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1673   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1674   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1675   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1676   M = header[1]; N = header[2]; nz = header[3];
1677 
1678   if (header[3] < 0) {
1679     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1680   }
1681 
1682   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1683 
1684   /*
1685      This code adds extra rows to make sure the number of rows is
1686     divisible by the blocksize
1687   */
1688   mbs        = M/bs;
1689   extra_rows = bs - M + bs*(mbs);
1690   if (extra_rows == bs) extra_rows = 0;
1691   else                  mbs++;
1692   if (extra_rows) {
1693     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1694   }
1695 
1696   /* read in row lengths */
1697   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1698   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1699   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1700 
1701   /* read in column indices */
1702   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1703   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1704   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1705 
1706   /* loop over row lengths determining block row lengths */
1707   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1708   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1709   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1710   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1711   masked      = mask + mbs;
1712   rowcount    = 0; nzcount = 0;
1713   for ( i=0; i<mbs; i++ ) {
1714     nmask = 0;
1715     for ( j=0; j<bs; j++ ) {
1716       kmax = rowlengths[rowcount];
1717       for ( k=0; k<kmax; k++ ) {
1718         tmp = jj[nzcount++]/bs;
1719         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1720       }
1721       rowcount++;
1722     }
1723     browlengths[i] += nmask;
1724     /* zero out the mask elements we set */
1725     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1726   }
1727 
1728   /* create our matrix */
1729   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1730   B = *A;
1731   a = (Mat_SeqBAIJ *) B->data;
1732 
1733   /* set matrix "i" values */
1734   a->i[0] = 0;
1735   for ( i=1; i<= mbs; i++ ) {
1736     a->i[i]      = a->i[i-1] + browlengths[i-1];
1737     a->ilen[i-1] = browlengths[i-1];
1738   }
1739   a->nz         = 0;
1740   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1741 
1742   /* read in nonzero values */
1743   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1744   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1745   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1746 
1747   /* set "a" and "j" values into matrix */
1748   nzcount = 0; jcount = 0;
1749   for ( i=0; i<mbs; i++ ) {
1750     nzcountb = nzcount;
1751     nmask    = 0;
1752     for ( j=0; j<bs; j++ ) {
1753       kmax = rowlengths[i*bs+j];
1754       for ( k=0; k<kmax; k++ ) {
1755         tmp = jj[nzcount++]/bs;
1756 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1757       }
1758       rowcount++;
1759     }
1760     /* sort the masked values */
1761     PetscSortInt(nmask,masked);
1762 
1763     /* set "j" values into matrix */
1764     maskcount = 1;
1765     for ( j=0; j<nmask; j++ ) {
1766       a->j[jcount++]  = masked[j];
1767       mask[masked[j]] = maskcount++;
1768     }
1769     /* set "a" values into matrix */
1770     ishift = bs2*a->i[i];
1771     for ( j=0; j<bs; j++ ) {
1772       kmax = rowlengths[i*bs+j];
1773       for ( k=0; k<kmax; k++ ) {
1774         tmp       = jj[nzcountb]/bs ;
1775         block     = mask[tmp] - 1;
1776         point     = jj[nzcountb] - bs*tmp;
1777         idx       = ishift + bs2*block + j + bs*point;
1778         a->a[idx] = aa[nzcountb++];
1779       }
1780     }
1781     /* zero out the mask elements we set */
1782     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1783   }
1784   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1785 
1786   PetscFree(rowlengths);
1787   PetscFree(browlengths);
1788   PetscFree(aa);
1789   PetscFree(jj);
1790   PetscFree(mask);
1791 
1792   B->assembled = PETSC_TRUE;
1793 
1794   ierr = MatView_Private(B);CHKERRQ(ierr);
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 
1799 
1800