xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d5df8fcd928597e1dfb6d2cc8c38ce53138e9bfc)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.149 1998/12/05 20:26:51 balay Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 #undef __FUNC__
17 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
18 int MatMarkDiag_SeqBAIJ(Mat A)
19 {
20   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
21   int         i,j, *diag, m = a->mbs;
22 
23   PetscFunctionBegin;
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     diag[i] = a->i[i+1];
28     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
29       if (a->j[j] == i) {
30         diag[i] = j;
31         break;
32       }
33     }
34   }
35   a->diag = diag;
36   PetscFunctionReturn(0);
37 }
38 
39 
40 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
41 
42 #undef __FUNC__
43 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   PetscFunctionBegin;
51   *nn = n;
52   if (!ia) PetscFunctionReturn(0);
53   if (symmetric) {
54     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
55   } else if (oshift == 1) {
56     /* temporarily add 1 to i and j indices */
57     int nz = a->i[n] + 1;
58     for ( i=0; i<nz; i++ ) a->j[i]++;
59     for ( i=0; i<n+1; i++ ) a->i[i]++;
60     *ia = a->i; *ja = a->j;
61   } else {
62     *ia = a->i; *ja = a->j;
63   }
64 
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNC__
69 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
70 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
71                                 PetscTruth *done)
72 {
73   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
74   int         i,n = a->mbs;
75 
76   PetscFunctionBegin;
77   if (!ia) PetscFunctionReturn(0);
78   if (symmetric) {
79     PetscFree(*ia);
80     PetscFree(*ja);
81   } else if (oshift == 1) {
82     int nz = a->i[n];
83     for ( i=0; i<nz; i++ ) a->j[i]--;
84     for ( i=0; i<n+1; i++ ) a->i[i]--;
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
91 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
92 {
93   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
94 
95   PetscFunctionBegin;
96   *bs = baij->bs;
97   PetscFunctionReturn(0);
98 }
99 
100 
101 #undef __FUNC__
102 #define __FUNC__ "MatDestroy_SeqBAIJ"
103 int MatDestroy_SeqBAIJ(Mat A)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
106   int         ierr;
107 
108   if (--A->refct > 0) PetscFunctionReturn(0);
109 
110   if (A->mapping) {
111     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
112   }
113   if (A->bmapping) {
114     ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr);
115   }
116   if (A->rmap) {
117     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
118   }
119   if (A->cmap) {
120     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
121   }
122 #if defined(USE_PETSC_LOG)
123   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
124 #endif
125   PetscFree(a->a);
126   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
127   if (a->diag) PetscFree(a->diag);
128   if (a->ilen) PetscFree(a->ilen);
129   if (a->imax) PetscFree(a->imax);
130   if (a->solve_work) PetscFree(a->solve_work);
131   if (a->mult_work) PetscFree(a->mult_work);
132   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
133   PetscFree(a);
134   PLogObjectDestroy(A);
135   PetscHeaderDestroy(A);
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNC__
140 #define __FUNC__ "MatSetOption_SeqBAIJ"
141 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
142 {
143   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
144 
145   PetscFunctionBegin;
146   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
147   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
148   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
149   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
150   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
151   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
152   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
153   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
154   else if (op == MAT_ROWS_SORTED ||
155            op == MAT_ROWS_UNSORTED ||
156            op == MAT_SYMMETRIC ||
157            op == MAT_STRUCTURALLY_SYMMETRIC ||
158            op == MAT_YES_NEW_DIAGONALS ||
159            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
160            op == MAT_USE_HASH_TABLE) {
161     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
162   } else if (op == MAT_NO_NEW_DIAGONALS) {
163     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
164   } else {
165     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 
171 #undef __FUNC__
172 #define __FUNC__ "MatGetSize_SeqBAIJ"
173 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
174 {
175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
176 
177   PetscFunctionBegin;
178   if (m) *m = a->m;
179   if (n) *n = a->n;
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNC__
184 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
185 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
186 {
187   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
188 
189   PetscFunctionBegin;
190   *m = 0; *n = a->m;
191   PetscFunctionReturn(0);
192 }
193 
194 #undef __FUNC__
195 #define __FUNC__ "MatGetRow_SeqBAIJ"
196 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
197 {
198   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
199   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
200   Scalar      *aa,*v_i,*aa_i;
201 
202   PetscFunctionBegin;
203   bs  = a->bs;
204   ai  = a->i;
205   aj  = a->j;
206   aa  = a->a;
207   bs2 = a->bs2;
208 
209   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
210 
211   bn  = row/bs;   /* Block number */
212   bp  = row % bs; /* Block Position */
213   M   = ai[bn+1] - ai[bn];
214   *nz = bs*M;
215 
216   if (v) {
217     *v = 0;
218     if (*nz) {
219       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
220       for ( i=0; i<M; i++ ) { /* for each block in the block row */
221         v_i  = *v + i*bs;
222         aa_i = aa + bs2*(ai[bn] + i);
223         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
224       }
225     }
226   }
227 
228   if (idx) {
229     *idx = 0;
230     if (*nz) {
231       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
232       for ( i=0; i<M; i++ ) { /* for each block in the block row */
233         idx_i = *idx + i*bs;
234         itmp  = bs*aj[ai[bn] + i];
235         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
236       }
237     }
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
244 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
245 {
246   PetscFunctionBegin;
247   if (idx) {if (*idx) PetscFree(*idx);}
248   if (v)   {if (*v)   PetscFree(*v);}
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNC__
253 #define __FUNC__ "MatTranspose_SeqBAIJ"
254 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
255 {
256   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
257   Mat         C;
258   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
259   int         *rows,*cols,bs2=a->bs2;
260   Scalar      *array=a->a;
261 
262   PetscFunctionBegin;
263   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
264   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
265   PetscMemzero(col,(1+nbs)*sizeof(int));
266 
267   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
268   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
269   PetscFree(col);
270   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
271   cols = rows + bs;
272   for ( i=0; i<mbs; i++ ) {
273     cols[0] = i*bs;
274     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
275     len = ai[i+1] - ai[i];
276     for ( j=0; j<len; j++ ) {
277       rows[0] = (*aj++)*bs;
278       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
279       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
280       array += bs2;
281     }
282   }
283   PetscFree(rows);
284 
285   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
286   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
287 
288   if (B != PETSC_NULL) {
289     *B = C;
290   } else {
291     PetscOps *Abops;
292     MatOps   Aops;
293 
294     /* This isn't really an in-place transpose */
295     PetscFree(a->a);
296     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
297     if (a->diag) PetscFree(a->diag);
298     if (a->ilen) PetscFree(a->ilen);
299     if (a->imax) PetscFree(a->imax);
300     if (a->solve_work) PetscFree(a->solve_work);
301     PetscFree(a);
302 
303 
304     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
305     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
306 
307     /*
308        This is horrible, horrible code. We need to keep the
309       A pointers for the bops and ops but copy everything
310       else from C.
311     */
312     Abops    = A->bops;
313     Aops     = A->ops;
314     PetscMemcpy(A,C,sizeof(struct _p_Mat));
315     A->bops  = Abops;
316     A->ops   = Aops;
317     A->qlist = 0;
318 
319     PetscHeaderDestroy(C);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 
325 
326 
327 #undef __FUNC__
328 #define __FUNC__ "MatView_SeqBAIJ_Binary"
329 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
330 {
331   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
332   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
333   Scalar      *aa;
334 
335   PetscFunctionBegin;
336   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
337   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
338   col_lens[0] = MAT_COOKIE;
339 
340   col_lens[1] = a->m;
341   col_lens[2] = a->n;
342   col_lens[3] = a->nz*bs2;
343 
344   /* store lengths of each row and write (including header) to file */
345   count = 0;
346   for ( i=0; i<a->mbs; i++ ) {
347     for ( j=0; j<bs; j++ ) {
348       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
349     }
350   }
351   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
352   PetscFree(col_lens);
353 
354   /* store column indices (zero start index) */
355   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
356   count = 0;
357   for ( i=0; i<a->mbs; i++ ) {
358     for ( j=0; j<bs; j++ ) {
359       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
360         for ( l=0; l<bs; l++ ) {
361           jj[count++] = bs*a->j[k] + l;
362         }
363       }
364     }
365   }
366   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
367   PetscFree(jj);
368 
369   /* store nonzero values */
370   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
371   count = 0;
372   for ( i=0; i<a->mbs; i++ ) {
373     for ( j=0; j<bs; j++ ) {
374       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
375         for ( l=0; l<bs; l++ ) {
376           aa[count++] = a->a[bs2*k + l*bs + j];
377         }
378       }
379     }
380   }
381   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
382   PetscFree(aa);
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNC__
387 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
388 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
389 {
390   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
391   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
392   FILE        *fd;
393   char        *outputname;
394 
395   PetscFunctionBegin;
396   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
397   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
398   ierr = ViewerGetFormat(viewer,&format);
399   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
400     fprintf(fd,"  block size is %d\n",bs);
401   }
402   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
403     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
404   }
405   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
406     for ( i=0; i<a->mbs; i++ ) {
407       for ( j=0; j<bs; j++ ) {
408         fprintf(fd,"row %d:",i*bs+j);
409         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
410           for ( l=0; l<bs; l++ ) {
411 #if defined(USE_PETSC_COMPLEX)
412           if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0)
413             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
414               PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
415           else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0)
416             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
417               PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
418           else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0)
419             fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
420 #else
421           if (a->a[bs2*k + l*bs + j] != 0.0)
422             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
423 #endif
424           }
425         }
426         fprintf(fd,"\n");
427       }
428     }
429   }
430   else {
431     for ( i=0; i<a->mbs; i++ ) {
432       for ( j=0; j<bs; j++ ) {
433         fprintf(fd,"row %d:",i*bs+j);
434         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
435           for ( l=0; l<bs; l++ ) {
436 #if defined(USE_PETSC_COMPLEX)
437           if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
438             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
439               PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
440           }
441           else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
442             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
443               PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
444           }
445           else {
446             fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
447           }
448 #else
449             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
450 #endif
451           }
452         }
453         fprintf(fd,"\n");
454       }
455     }
456   }
457   fflush(fd);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNC__
462 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
463 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
464 {
465   Mat          A = (Mat) Aa;
466   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
467   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
468   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
469   Scalar       *aa;
470   MPI_Comm     comm;
471   Viewer       viewer;
472 
473   PetscFunctionBegin;
474   /*
475       This is nasty. If this is called from an originally parallel matrix
476    then all processes call this, but only the first has the matrix so the
477    rest should return immediately.
478   */
479   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
480   MPI_Comm_rank(comm,&rank);
481   if (rank) PetscFunctionReturn(0);
482 
483   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
484 
485   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr);
486 
487   /* loop over matrix elements drawing boxes */
488   color = DRAW_BLUE;
489   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
490     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
491       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
492       x_l = a->j[j]*bs; x_r = x_l + 1.0;
493       aa = a->a + j*bs2;
494       for ( k=0; k<bs; k++ ) {
495         for ( l=0; l<bs; l++ ) {
496           if (PetscReal(*aa++) >=  0.) continue;
497           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
498         }
499       }
500     }
501   }
502   color = DRAW_CYAN;
503   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
504     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
505       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
506       x_l = a->j[j]*bs; x_r = x_l + 1.0;
507       aa = a->a + j*bs2;
508       for ( k=0; k<bs; k++ ) {
509         for ( l=0; l<bs; l++ ) {
510           if (PetscReal(*aa++) != 0.) continue;
511           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
512         }
513       }
514     }
515   }
516 
517   color = DRAW_RED;
518   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
519     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
520       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
521       x_l = a->j[j]*bs; x_r = x_l + 1.0;
522       aa = a->a + j*bs2;
523       for ( k=0; k<bs; k++ ) {
524         for ( l=0; l<bs; l++ ) {
525           if (PetscReal(*aa++) <= 0.) continue;
526           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 #undef __FUNC__
535 #define __FUNC__ "MatView_SeqBAIJ_Draw"
536 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
537 {
538   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
539   int          ierr;
540   double       xl,yl,xr,yr,w,h;
541   Draw         draw;
542   PetscTruth   isnull;
543 
544   PetscFunctionBegin;
545 
546   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
547   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
548 
549   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
550   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
551   xr += w;    yr += h;  xl = -w;     yl = -h;
552   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
553   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr);
554   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNC__
559 #define __FUNC__ "MatView_SeqBAIJ"
560 int MatView_SeqBAIJ(Mat A,Viewer viewer)
561 {
562   ViewerType  vtype;
563   int         ierr;
564 
565   PetscFunctionBegin;
566   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
567   if (!PetscStrcmp(vtype,MATLAB_VIEWER)) {
568     SETERRQ(PETSC_ERR_SUP,0,"Matlab viewer not supported");
569   } else if (!PetscStrcmp(vtype,ASCII_VIEWER)){
570     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
571   } else if (!PetscStrcmp(vtype,BINARY_VIEWER)) {
572     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
573   } else if (!PetscStrcmp(vtype,DRAW_VIEWER)) {
574     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
575   } else {
576     SETERRQ(1,1,"Viewer type not supported by PETSc object");
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 
582 #undef __FUNC__
583 #define __FUNC__ "MatGetValues_SeqBAIJ"
584 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
585 {
586   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
587   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
588   int        *ai = a->i, *ailen = a->ilen;
589   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
590   Scalar     *ap, *aa = a->a, zero = 0.0;
591 
592   PetscFunctionBegin;
593   for ( k=0; k<m; k++ ) { /* loop over rows */
594     row  = im[k]; brow = row/bs;
595     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
596     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
597     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
598     nrow = ailen[brow];
599     for ( l=0; l<n; l++ ) { /* loop over columns */
600       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
601       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
602       col  = in[l] ;
603       bcol = col/bs;
604       cidx = col%bs;
605       ridx = row%bs;
606       high = nrow;
607       low  = 0; /* assume unsorted */
608       while (high-low > 5) {
609         t = (low+high)/2;
610         if (rp[t] > bcol) high = t;
611         else             low  = t;
612       }
613       for ( i=low; i<high; i++ ) {
614         if (rp[i] > bcol) break;
615         if (rp[i] == bcol) {
616           *v++ = ap[bs2*i+bs*cidx+ridx];
617           goto finished;
618         }
619       }
620       *v++ = zero;
621       finished:;
622     }
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
630 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
631 {
632   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
633   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
634   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
635   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
636   Scalar      *ap,*value=v,*aa=a->a,*bap;
637 
638   PetscFunctionBegin;
639   if (roworiented) {
640     stepval = (n-1)*bs;
641   } else {
642     stepval = (m-1)*bs;
643   }
644   for ( k=0; k<m; k++ ) { /* loop over added rows */
645     row  = im[k];
646 #if defined(USE_PETSC_BOPT_g)
647     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
648     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
649 #endif
650     rp   = aj + ai[row];
651     ap   = aa + bs2*ai[row];
652     rmax = imax[row];
653     nrow = ailen[row];
654     low  = 0;
655     for ( l=0; l<n; l++ ) { /* loop over added columns */
656 #if defined(USE_PETSC_BOPT_g)
657       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
658       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
659 #endif
660       col = in[l];
661       if (roworiented) {
662         value = v + k*(stepval+bs)*bs + l*bs;
663       } else {
664         value = v + l*(stepval+bs)*bs + k*bs;
665       }
666       if (!sorted) low = 0; high = nrow;
667       while (high-low > 7) {
668         t = (low+high)/2;
669         if (rp[t] > col) high = t;
670         else             low  = t;
671       }
672       for ( i=low; i<high; i++ ) {
673         if (rp[i] > col) break;
674         if (rp[i] == col) {
675           bap  = ap +  bs2*i;
676           if (roworiented) {
677             if (is == ADD_VALUES) {
678               for ( ii=0; ii<bs; ii++,value+=stepval ) {
679                 for (jj=ii; jj<bs2; jj+=bs ) {
680                   bap[jj] += *value++;
681                 }
682               }
683             } else {
684               for ( ii=0; ii<bs; ii++,value+=stepval ) {
685                 for (jj=ii; jj<bs2; jj+=bs ) {
686                   bap[jj] = *value++;
687                 }
688               }
689             }
690           } else {
691             if (is == ADD_VALUES) {
692               for ( ii=0; ii<bs; ii++,value+=stepval ) {
693                 for (jj=0; jj<bs; jj++ ) {
694                   *bap++ += *value++;
695                 }
696               }
697             } else {
698               for ( ii=0; ii<bs; ii++,value+=stepval ) {
699                 for (jj=0; jj<bs; jj++ ) {
700                   *bap++  = *value++;
701                 }
702               }
703             }
704           }
705           goto noinsert2;
706         }
707       }
708       if (nonew == 1) goto noinsert2;
709       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
710       if (nrow >= rmax) {
711         /* there is no extra room in row, therefore enlarge */
712         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
713         Scalar *new_a;
714 
715         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
716 
717         /* malloc new storage space */
718         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
719         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
720         new_j   = (int *) (new_a + bs2*new_nz);
721         new_i   = new_j + new_nz;
722 
723         /* copy over old data into new slots */
724         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
725         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
726         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
727         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
728         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
729         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
730         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
731         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
732         /* free up old matrix storage */
733         PetscFree(a->a);
734         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
735         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
736         a->singlemalloc = 1;
737 
738         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
739         rmax = imax[row] = imax[row] + CHUNKSIZE;
740         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
741         a->maxnz += bs2*CHUNKSIZE;
742         a->reallocs++;
743         a->nz++;
744       }
745       N = nrow++ - 1;
746       /* shift up all the later entries in this row */
747       for ( ii=N; ii>=i; ii-- ) {
748         rp[ii+1] = rp[ii];
749         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
750       }
751       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
752       rp[i] = col;
753       bap   = ap +  bs2*i;
754       if (roworiented) {
755         for ( ii=0; ii<bs; ii++,value+=stepval) {
756           for (jj=ii; jj<bs2; jj+=bs ) {
757             bap[jj] = *value++;
758           }
759         }
760       } else {
761         for ( ii=0; ii<bs; ii++,value+=stepval ) {
762           for (jj=0; jj<bs; jj++ ) {
763             *bap++  = *value++;
764           }
765         }
766       }
767       noinsert2:;
768       low = i;
769     }
770     ailen[row] = nrow;
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 #undef __FUNC__
777 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
778 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
779 {
780   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
781   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
782   int        m = a->m,*ip, N, *ailen = a->ilen;
783   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
784   Scalar     *aa = a->a, *ap;
785 
786   PetscFunctionBegin;
787   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
788 
789   if (m) rmax = ailen[0];
790   for ( i=1; i<mbs; i++ ) {
791     /* move each row back by the amount of empty slots (fshift) before it*/
792     fshift += imax[i-1] - ailen[i-1];
793     rmax   = PetscMax(rmax,ailen[i]);
794     if (fshift) {
795       ip = aj + ai[i]; ap = aa + bs2*ai[i];
796       N = ailen[i];
797       for ( j=0; j<N; j++ ) {
798         ip[j-fshift] = ip[j];
799         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
800       }
801     }
802     ai[i] = ai[i-1] + ailen[i-1];
803   }
804   if (mbs) {
805     fshift += imax[mbs-1] - ailen[mbs-1];
806     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
807   }
808   /* reset ilen and imax for each row */
809   for ( i=0; i<mbs; i++ ) {
810     ailen[i] = imax[i] = ai[i+1] - ai[i];
811   }
812   a->nz = ai[mbs];
813 
814   /* diagonals may have moved, so kill the diagonal pointers */
815   if (fshift && a->diag) {
816     PetscFree(a->diag);
817     PLogObjectMemory(A,-(m+1)*sizeof(int));
818     a->diag = 0;
819   }
820   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
821            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
822   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
823            a->reallocs);
824   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
825   a->reallocs          = 0;
826   A->info.nz_unneeded  = (double)fshift*bs2;
827 
828   PetscFunctionReturn(0);
829 }
830 
831 
832 /* idx should be of length atlease bs */
833 #undef __FUNC__
834 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
835 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
836 {
837   int i,row;
838 
839   PetscFunctionBegin;
840   row = idx[0];
841   if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
842 
843   for ( i=1; i<bs; i++ ) {
844     if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
845   }
846   *flg = PETSC_TRUE;
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNC__
851 #define __FUNC__ "MatZeroRows_SeqBAIJ"
852 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
853 {
854   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
855   IS          is_local;
856   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
857   PetscTruth  flg;
858   Scalar      zero = 0.0,*aa;
859 
860   PetscFunctionBegin;
861   /* Make a copy of the IS and  sort it */
862   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
863   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
864   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
865   ierr = ISSort(is_local); CHKERRQ(ierr);
866   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
867 
868   i = 0;
869   while (i < is_n) {
870     if (rows[i]<0 || rows[i]>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"row out of range");
871     flg = PETSC_FALSE;
872     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
873     count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
874     aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
875     if (flg) { /* There exists a block of rows to be Zerowed */
876       if (baij->ilen[rows[i]/bs] > 0) {
877         PetscMemzero(aa,count*bs*sizeof(Scalar));
878         baij->ilen[rows[i]/bs] = 1;
879         baij->j[baij->i[rows[i]/bs]] = rows[i]/bs;
880       }
881       i += bs;
882     } else { /* Zero out only the requested row */
883       for ( j=0; j<count; j++ ) {
884         aa[0] = zero;
885         aa+=bs;
886       }
887       i++;
888     }
889   }
890   if (diag) {
891     for ( j=0; j<is_n; j++ ) {
892       ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
893       /* ierr = MatSetValues(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); */
894     }
895   }
896   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
897   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
898   ierr = ISDestroy(is_local); CHKERRQ(ierr);
899   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNC__
904 #define __FUNC__ "MatSetValues_SeqBAIJ"
905 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
906 {
907   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
908   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
909   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
910   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
911   int          ridx,cidx,bs2=a->bs2;
912   Scalar      *ap,value,*aa=a->a,*bap;
913 
914   PetscFunctionBegin;
915   for ( k=0; k<m; k++ ) { /* loop over added rows */
916     row  = im[k]; brow = row/bs;
917 #if defined(USE_PETSC_BOPT_g)
918     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
919     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
920 #endif
921     rp   = aj + ai[brow];
922     ap   = aa + bs2*ai[brow];
923     rmax = imax[brow];
924     nrow = ailen[brow];
925     low  = 0;
926     for ( l=0; l<n; l++ ) { /* loop over added columns */
927 #if defined(USE_PETSC_BOPT_g)
928       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
929       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
930 #endif
931       col = in[l]; bcol = col/bs;
932       ridx = row % bs; cidx = col % bs;
933       if (roworiented) {
934         value = *v++;
935       } else {
936         value = v[k + l*m];
937       }
938       if (!sorted) low = 0; high = nrow;
939       while (high-low > 7) {
940         t = (low+high)/2;
941         if (rp[t] > bcol) high = t;
942         else              low  = t;
943       }
944       for ( i=low; i<high; i++ ) {
945         if (rp[i] > bcol) break;
946         if (rp[i] == bcol) {
947           bap  = ap +  bs2*i + bs*cidx + ridx;
948           if (is == ADD_VALUES) *bap += value;
949           else                  *bap  = value;
950           goto noinsert1;
951         }
952       }
953       if (nonew == 1) goto noinsert1;
954       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
955       if (nrow >= rmax) {
956         /* there is no extra room in row, therefore enlarge */
957         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
958         Scalar *new_a;
959 
960         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
961 
962         /* Malloc new storage space */
963         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
964         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
965         new_j   = (int *) (new_a + bs2*new_nz);
966         new_i   = new_j + new_nz;
967 
968         /* copy over old data into new slots */
969         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
970         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
971         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
972         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
973         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
974                                                            len*sizeof(int));
975         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
976         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
977         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
978                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
979         /* free up old matrix storage */
980         PetscFree(a->a);
981         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
982         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
983         a->singlemalloc = 1;
984 
985         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
986         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
987         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
988         a->maxnz += bs2*CHUNKSIZE;
989         a->reallocs++;
990         a->nz++;
991       }
992       N = nrow++ - 1;
993       /* shift up all the later entries in this row */
994       for ( ii=N; ii>=i; ii-- ) {
995         rp[ii+1] = rp[ii];
996         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
997       }
998       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
999       rp[i]                      = bcol;
1000       ap[bs2*i + bs*cidx + ridx] = value;
1001       noinsert1:;
1002       low = i;
1003     }
1004     ailen[brow] = nrow;
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1010 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1011 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1012 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatGetSubMatrixCall,Mat*);
1013 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1014 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1015 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1016 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1017 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1018 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1019 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1020 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1021 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1022 extern int MatZeroEntries_SeqBAIJ(Mat);
1023 
1024 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1025 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1026 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1027 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1028 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1029 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1030 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1031 
1032 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1033 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1034 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1035 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1036 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1037 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1038 
1039 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1040 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1041 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1042 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1043 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1044 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1045 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1046 
1047 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1048 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1049 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1050 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1051 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1052 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1054 
1055 #undef __FUNC__
1056 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1057 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1058 {
1059   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1060   Mat         outA;
1061   int         ierr;
1062   PetscTruth  row_identity, col_identity;
1063 
1064   PetscFunctionBegin;
1065   if (fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
1066   ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr);
1067   ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr);
1068   if (!row_identity || !col_identity) {
1069     SETERRQ(1,1,"Row and column permutations must be identity");
1070   }
1071 
1072   outA          = inA;
1073   inA->factor   = FACTOR_LU;
1074   a->row        = row;
1075   a->col        = col;
1076 
1077   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1078   ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr);
1079   PLogObjectParent(inA,a->icol);
1080 
1081   if (!a->solve_work) {
1082     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1083     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1084   }
1085 
1086   if (!a->diag) {
1087     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1088   }
1089   /*
1090       Blocksize 4 and 5 have a special faster factorization/solver for ILU(0) factorization
1091     with natural ordering
1092   */
1093   if (a->bs == 4) {
1094     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1095     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1096     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");
1097   } else if (a->bs == 5) {
1098     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1099     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1100     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");
1101   }
1102 
1103   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1104 
1105 
1106   PetscFunctionReturn(0);
1107 }
1108 #undef __FUNC__
1109 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1110 int MatPrintHelp_SeqBAIJ(Mat A)
1111 {
1112   static int called = 0;
1113   MPI_Comm   comm = A->comm;
1114 
1115   PetscFunctionBegin;
1116   if (called) {PetscFunctionReturn(0);} else called = 1;
1117   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1118   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 EXTERN_C_BEGIN
1123 #undef __FUNC__
1124 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1125 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1126 {
1127   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1128   int         i,nz,n;
1129 
1130   PetscFunctionBegin;
1131   nz = baij->maxnz;
1132   n  = baij->n;
1133   for (i=0; i<nz; i++) {
1134     baij->j[i] = indices[i];
1135   }
1136   baij->nz = nz;
1137   for ( i=0; i<n; i++ ) {
1138     baij->ilen[i] = baij->imax[i];
1139   }
1140 
1141   PetscFunctionReturn(0);
1142 }
1143 EXTERN_C_END
1144 
1145 #undef __FUNC__
1146 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1147 /*@
1148     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1149        in the matrix.
1150 
1151   Input Parameters:
1152 +  mat - the SeqBAIJ matrix
1153 -  indices - the column indices
1154 
1155   Notes:
1156     This can be called if you have precomputed the nonzero structure of the
1157   matrix and want to provide it to the matrix object to improve the performance
1158   of the MatSetValues() operation.
1159 
1160     You MUST have set the correct numbers of nonzeros per row in the call to
1161   MatCreateSeqBAIJ().
1162 
1163     MUST be called before any calls to MatSetValues();
1164 
1165 @*/
1166 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1167 {
1168   int ierr,(*f)(Mat,int *);
1169 
1170   PetscFunctionBegin;
1171   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1172   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);
1173          CHKERRQ(ierr);
1174   if (f) {
1175     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1176   } else {
1177     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /* -------------------------------------------------------------------*/
1183 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1184        MatGetRow_SeqBAIJ,
1185        MatRestoreRow_SeqBAIJ,
1186        MatMult_SeqBAIJ_N,
1187        MatMultAdd_SeqBAIJ_N,
1188        MatMultTrans_SeqBAIJ,
1189        MatMultTransAdd_SeqBAIJ,
1190        MatSolve_SeqBAIJ_N,
1191        0,
1192        0,
1193        0,
1194        MatLUFactor_SeqBAIJ,
1195        0,
1196        0,
1197        MatTranspose_SeqBAIJ,
1198        MatGetInfo_SeqBAIJ,
1199        MatEqual_SeqBAIJ,
1200        MatGetDiagonal_SeqBAIJ,
1201        MatDiagonalScale_SeqBAIJ,
1202        MatNorm_SeqBAIJ,
1203        0,
1204        MatAssemblyEnd_SeqBAIJ,
1205        0,
1206        MatSetOption_SeqBAIJ,
1207        MatZeroEntries_SeqBAIJ,
1208        MatZeroRows_SeqBAIJ,
1209        MatLUFactorSymbolic_SeqBAIJ,
1210        MatLUFactorNumeric_SeqBAIJ_N,
1211        0,
1212        0,
1213        MatGetSize_SeqBAIJ,
1214        MatGetSize_SeqBAIJ,
1215        MatGetOwnershipRange_SeqBAIJ,
1216        MatILUFactorSymbolic_SeqBAIJ,
1217        0,
1218        0,
1219        0,
1220        MatDuplicate_SeqBAIJ,
1221        0,
1222        0,
1223        MatILUFactor_SeqBAIJ,
1224        0,
1225        0,
1226        MatGetSubMatrices_SeqBAIJ,
1227        MatIncreaseOverlap_SeqBAIJ,
1228        MatGetValues_SeqBAIJ,
1229        0,
1230        MatPrintHelp_SeqBAIJ,
1231        MatScale_SeqBAIJ,
1232        0,
1233        0,
1234        0,
1235        MatGetBlockSize_SeqBAIJ,
1236        MatGetRowIJ_SeqBAIJ,
1237        MatRestoreRowIJ_SeqBAIJ,
1238        0,
1239        0,
1240        0,
1241        0,
1242        0,
1243        0,
1244        MatSetValuesBlocked_SeqBAIJ,
1245        MatGetSubMatrix_SeqBAIJ,
1246        0,
1247        0,
1248        MatGetMaps_Petsc};
1249 
1250 #undef __FUNC__
1251 #define __FUNC__ "MatCreateSeqBAIJ"
1252 /*@C
1253    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1254    compressed row) format.  For good matrix assembly performance the
1255    user should preallocate the matrix storage by setting the parameter nz
1256    (or the array nzz).  By setting these parameters accurately, performance
1257    during matrix assembly can be increased by more than a factor of 50.
1258 
1259    Collective on MPI_Comm
1260 
1261    Input Parameters:
1262 +  comm - MPI communicator, set to PETSC_COMM_SELF
1263 .  bs - size of block
1264 .  m - number of rows
1265 .  n - number of columns
1266 .  nz - number of block nonzeros per block row (same for all rows)
1267 -  nzz - array containing the number of block nonzeros in the various block rows
1268          (possibly different for each block row) or PETSC_NULL
1269 
1270    Output Parameter:
1271 .  A - the matrix
1272 
1273    Options Database Keys:
1274 .   -mat_no_unroll - uses code that does not unroll the loops in the
1275                      block calculations (much slower)
1276 .    -mat_block_size - size of the blocks to use
1277 
1278    Notes:
1279    The block AIJ format is fully compatible with standard Fortran 77
1280    storage.  That is, the stored row and column indices can begin at
1281    either one (as in Fortran) or zero.  See the users' manual for details.
1282 
1283    Specify the preallocated storage with either nz or nnz (not both).
1284    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1285    allocation.  For additional details, see the users manual chapter on
1286    matrices.
1287 
1288 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1289 @*/
1290 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1291 {
1292   Mat         B;
1293   Mat_SeqBAIJ *b;
1294   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1295 
1296   PetscFunctionBegin;
1297   MPI_Comm_size(comm,&size);
1298   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1299 
1300   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1301 
1302   if (mbs*bs!=m || nbs*bs!=n) {
1303     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1304   }
1305 
1306   *A = 0;
1307   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
1308   PLogObjectCreate(B);
1309   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1310   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1311   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1312   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1313   if (!flg) {
1314     switch (bs) {
1315     case 1:
1316       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1317       B->ops->solve           = MatSolve_SeqBAIJ_1;
1318       B->ops->mult            = MatMult_SeqBAIJ_1;
1319       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1320       break;
1321     case 2:
1322       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1323       B->ops->solve           = MatSolve_SeqBAIJ_2;
1324       B->ops->mult            = MatMult_SeqBAIJ_2;
1325       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1326       break;
1327     case 3:
1328       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1329       B->ops->solve           = MatSolve_SeqBAIJ_3;
1330       B->ops->mult            = MatMult_SeqBAIJ_3;
1331       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1332       break;
1333     case 4:
1334       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1335       B->ops->solve           = MatSolve_SeqBAIJ_4;
1336       B->ops->mult            = MatMult_SeqBAIJ_4;
1337       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1338       break;
1339     case 5:
1340       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1341       B->ops->solve           = MatSolve_SeqBAIJ_5;
1342       B->ops->mult            = MatMult_SeqBAIJ_5;
1343       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1344       break;
1345     case 7:
1346       B->ops->mult            = MatMult_SeqBAIJ_7;
1347       B->ops->solve           = MatSolve_SeqBAIJ_7;
1348       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1349       break;
1350     }
1351   }
1352   B->ops->destroy     = MatDestroy_SeqBAIJ;
1353   B->ops->view        = MatView_SeqBAIJ;
1354   B->factor           = 0;
1355   B->lupivotthreshold = 1.0;
1356   B->mapping          = 0;
1357   b->row              = 0;
1358   b->col              = 0;
1359   b->icol             = 0;
1360   b->reallocs         = 0;
1361 
1362   b->m       = m; B->m = m; B->M = m;
1363   b->n       = n; B->n = n; B->N = n;
1364 
1365   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1366   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1367 
1368   b->mbs     = mbs;
1369   b->nbs     = nbs;
1370   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1371   if (nnz == PETSC_NULL) {
1372     if (nz == PETSC_DEFAULT) nz = 5;
1373     else if (nz <= 0)        nz = 1;
1374     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1375     nz = nz*mbs;
1376   } else {
1377     nz = 0;
1378     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1379   }
1380 
1381   /* allocate the matrix space */
1382   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1383   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1384   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1385   b->j  = (int *) (b->a + nz*bs2);
1386   PetscMemzero(b->j,nz*sizeof(int));
1387   b->i  = b->j + nz;
1388   b->singlemalloc = 1;
1389 
1390   b->i[0] = 0;
1391   for (i=1; i<mbs+1; i++) {
1392     b->i[i] = b->i[i-1] + b->imax[i-1];
1393   }
1394 
1395   /* b->ilen will count nonzeros in each block row so far. */
1396   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1397   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1398   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1399 
1400   b->bs               = bs;
1401   b->bs2              = bs2;
1402   b->mbs              = mbs;
1403   b->nz               = 0;
1404   b->maxnz            = nz*bs2;
1405   b->sorted           = 0;
1406   b->roworiented      = 1;
1407   b->nonew            = 0;
1408   b->diag             = 0;
1409   b->solve_work       = 0;
1410   b->mult_work        = 0;
1411   b->spptr            = 0;
1412   B->info.nz_unneeded = (double)b->maxnz;
1413 
1414   *A = B;
1415   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1416   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1417 
1418   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1419                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1420                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1421 
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNC__
1426 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1427 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1428 {
1429   Mat         C;
1430   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1431   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1432 
1433   PetscFunctionBegin;
1434   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1435 
1436   *B = 0;
1437   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
1438   PLogObjectCreate(C);
1439   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1440   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1441   C->ops->destroy    = MatDestroy_SeqBAIJ;
1442   C->ops->view       = MatView_SeqBAIJ;
1443   C->factor          = A->factor;
1444   c->row             = 0;
1445   c->col             = 0;
1446   C->assembled       = PETSC_TRUE;
1447 
1448   c->m = C->m   = a->m;
1449   c->n = C->n   = a->n;
1450   C->M          = a->m;
1451   C->N          = a->n;
1452 
1453   c->bs         = a->bs;
1454   c->bs2        = a->bs2;
1455   c->mbs        = a->mbs;
1456   c->nbs        = a->nbs;
1457 
1458   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1459   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1460   for ( i=0; i<mbs; i++ ) {
1461     c->imax[i] = a->imax[i];
1462     c->ilen[i] = a->ilen[i];
1463   }
1464 
1465   /* allocate the matrix space */
1466   c->singlemalloc = 1;
1467   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1468   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1469   c->j  = (int *) (c->a + nz*bs2);
1470   c->i  = c->j + nz;
1471   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1472   if (mbs > 0) {
1473     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1474     if (cpvalues == MAT_COPY_VALUES) {
1475       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1476     } else {
1477       PetscMemzero(c->a,bs2*nz*sizeof(Scalar));
1478     }
1479   }
1480 
1481   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1482   c->sorted      = a->sorted;
1483   c->roworiented = a->roworiented;
1484   c->nonew       = a->nonew;
1485 
1486   if (a->diag) {
1487     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1488     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1489     for ( i=0; i<mbs; i++ ) {
1490       c->diag[i] = a->diag[i];
1491     }
1492   }
1493   else c->diag          = 0;
1494   c->nz                 = a->nz;
1495   c->maxnz              = a->maxnz;
1496   c->solve_work         = 0;
1497   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1498   c->mult_work          = 0;
1499   *B = C;
1500   ierr = PetscObjectComposeFunction((PetscObject)C,"MatSeqBAIJSetColumnIndices_C",
1501                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1502                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNC__
1507 #define __FUNC__ "MatLoad_SeqBAIJ"
1508 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1509 {
1510   Mat_SeqBAIJ  *a;
1511   Mat          B;
1512   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1513   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1514   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1515   int          *masked, nmask,tmp,bs2,ishift;
1516   Scalar       *aa;
1517   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1518 
1519   PetscFunctionBegin;
1520   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1521   bs2  = bs*bs;
1522 
1523   MPI_Comm_size(comm,&size);
1524   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1525   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1526   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
1527   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1528   M = header[1]; N = header[2]; nz = header[3];
1529 
1530   if (header[3] < 0) {
1531     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1532   }
1533 
1534   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1535 
1536   /*
1537      This code adds extra rows to make sure the number of rows is
1538     divisible by the blocksize
1539   */
1540   mbs        = M/bs;
1541   extra_rows = bs - M + bs*(mbs);
1542   if (extra_rows == bs) extra_rows = 0;
1543   else                  mbs++;
1544   if (extra_rows) {
1545     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1546   }
1547 
1548   /* read in row lengths */
1549   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1550   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1551   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1552 
1553   /* read in column indices */
1554   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1555   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
1556   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1557 
1558   /* loop over row lengths determining block row lengths */
1559   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1560   PetscMemzero(browlengths,mbs*sizeof(int));
1561   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1562   PetscMemzero(mask,mbs*sizeof(int));
1563   masked = mask + mbs;
1564   rowcount = 0; nzcount = 0;
1565   for ( i=0; i<mbs; i++ ) {
1566     nmask = 0;
1567     for ( j=0; j<bs; j++ ) {
1568       kmax = rowlengths[rowcount];
1569       for ( k=0; k<kmax; k++ ) {
1570         tmp = jj[nzcount++]/bs;
1571         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1572       }
1573       rowcount++;
1574     }
1575     browlengths[i] += nmask;
1576     /* zero out the mask elements we set */
1577     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1578   }
1579 
1580   /* create our matrix */
1581   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1582   B = *A;
1583   a = (Mat_SeqBAIJ *) B->data;
1584 
1585   /* set matrix "i" values */
1586   a->i[0] = 0;
1587   for ( i=1; i<= mbs; i++ ) {
1588     a->i[i]      = a->i[i-1] + browlengths[i-1];
1589     a->ilen[i-1] = browlengths[i-1];
1590   }
1591   a->nz         = 0;
1592   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1593 
1594   /* read in nonzero values */
1595   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1596   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
1597   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1598 
1599   /* set "a" and "j" values into matrix */
1600   nzcount = 0; jcount = 0;
1601   for ( i=0; i<mbs; i++ ) {
1602     nzcountb = nzcount;
1603     nmask    = 0;
1604     for ( j=0; j<bs; j++ ) {
1605       kmax = rowlengths[i*bs+j];
1606       for ( k=0; k<kmax; k++ ) {
1607         tmp = jj[nzcount++]/bs;
1608 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1609       }
1610       rowcount++;
1611     }
1612     /* sort the masked values */
1613     PetscSortInt(nmask,masked);
1614 
1615     /* set "j" values into matrix */
1616     maskcount = 1;
1617     for ( j=0; j<nmask; j++ ) {
1618       a->j[jcount++]  = masked[j];
1619       mask[masked[j]] = maskcount++;
1620     }
1621     /* set "a" values into matrix */
1622     ishift = bs2*a->i[i];
1623     for ( j=0; j<bs; j++ ) {
1624       kmax = rowlengths[i*bs+j];
1625       for ( k=0; k<kmax; k++ ) {
1626         tmp    = jj[nzcountb]/bs ;
1627         block  = mask[tmp] - 1;
1628         point  = jj[nzcountb] - bs*tmp;
1629         idx    = ishift + bs2*block + j + bs*point;
1630         a->a[idx] = aa[nzcountb++];
1631       }
1632     }
1633     /* zero out the mask elements we set */
1634     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1635   }
1636   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1637 
1638   PetscFree(rowlengths);
1639   PetscFree(browlengths);
1640   PetscFree(aa);
1641   PetscFree(jj);
1642   PetscFree(mask);
1643 
1644   B->assembled = PETSC_TRUE;
1645 
1646   ierr = MatView_Private(B); CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 
1651 
1652