xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3f1db9ec2fd39765c6c3a00831044586630c4cca)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.150 1998/12/06 16:38:10 balay Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 #undef __FUNC__
17 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
18 int MatMarkDiag_SeqBAIJ(Mat A)
19 {
20   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
21   int         i,j, *diag, m = a->mbs;
22 
23   PetscFunctionBegin;
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     diag[i] = a->i[i+1];
28     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
29       if (a->j[j] == i) {
30         diag[i] = j;
31         break;
32       }
33     }
34   }
35   a->diag = diag;
36   PetscFunctionReturn(0);
37 }
38 
39 
40 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
41 
42 #undef __FUNC__
43 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   PetscFunctionBegin;
51   *nn = n;
52   if (!ia) PetscFunctionReturn(0);
53   if (symmetric) {
54     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
55   } else if (oshift == 1) {
56     /* temporarily add 1 to i and j indices */
57     int nz = a->i[n] + 1;
58     for ( i=0; i<nz; i++ ) a->j[i]++;
59     for ( i=0; i<n+1; i++ ) a->i[i]++;
60     *ia = a->i; *ja = a->j;
61   } else {
62     *ia = a->i; *ja = a->j;
63   }
64 
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNC__
69 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
70 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
71                                 PetscTruth *done)
72 {
73   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
74   int         i,n = a->mbs;
75 
76   PetscFunctionBegin;
77   if (!ia) PetscFunctionReturn(0);
78   if (symmetric) {
79     PetscFree(*ia);
80     PetscFree(*ja);
81   } else if (oshift == 1) {
82     int nz = a->i[n];
83     for ( i=0; i<nz; i++ ) a->j[i]--;
84     for ( i=0; i<n+1; i++ ) a->i[i]--;
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
91 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
92 {
93   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
94 
95   PetscFunctionBegin;
96   *bs = baij->bs;
97   PetscFunctionReturn(0);
98 }
99 
100 
101 #undef __FUNC__
102 #define __FUNC__ "MatDestroy_SeqBAIJ"
103 int MatDestroy_SeqBAIJ(Mat A)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
106   int         ierr;
107 
108   if (--A->refct > 0) PetscFunctionReturn(0);
109 
110   if (A->mapping) {
111     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
112   }
113   if (A->bmapping) {
114     ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr);
115   }
116   if (A->rmap) {
117     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
118   }
119   if (A->cmap) {
120     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
121   }
122 #if defined(USE_PETSC_LOG)
123   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
124 #endif
125   PetscFree(a->a);
126   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
127   if (a->diag) PetscFree(a->diag);
128   if (a->ilen) PetscFree(a->ilen);
129   if (a->imax) PetscFree(a->imax);
130   if (a->solve_work) PetscFree(a->solve_work);
131   if (a->mult_work) PetscFree(a->mult_work);
132   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
133   PetscFree(a);
134   PLogObjectDestroy(A);
135   PetscHeaderDestroy(A);
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNC__
140 #define __FUNC__ "MatSetOption_SeqBAIJ"
141 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
142 {
143   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
144 
145   PetscFunctionBegin;
146   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
147   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
148   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
149   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
150   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
151   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
152   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
153   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
154   else if (op == MAT_ROWS_SORTED ||
155            op == MAT_ROWS_UNSORTED ||
156            op == MAT_SYMMETRIC ||
157            op == MAT_STRUCTURALLY_SYMMETRIC ||
158            op == MAT_YES_NEW_DIAGONALS ||
159            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
160            op == MAT_USE_HASH_TABLE) {
161     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
162   } else if (op == MAT_NO_NEW_DIAGONALS) {
163     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
164   } else {
165     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 
171 #undef __FUNC__
172 #define __FUNC__ "MatGetSize_SeqBAIJ"
173 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
174 {
175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
176 
177   PetscFunctionBegin;
178   if (m) *m = a->m;
179   if (n) *n = a->n;
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNC__
184 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
185 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
186 {
187   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
188 
189   PetscFunctionBegin;
190   *m = 0; *n = a->m;
191   PetscFunctionReturn(0);
192 }
193 
194 #undef __FUNC__
195 #define __FUNC__ "MatGetRow_SeqBAIJ"
196 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
197 {
198   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
199   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
200   MatScalar    *aa,*aa_i;
201   Scalar       *v_i;
202 
203   PetscFunctionBegin;
204   bs  = a->bs;
205   ai  = a->i;
206   aj  = a->j;
207   aa  = a->a;
208   bs2 = a->bs2;
209 
210   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
211 
212   bn  = row/bs;   /* Block number */
213   bp  = row % bs; /* Block Position */
214   M   = ai[bn+1] - ai[bn];
215   *nz = bs*M;
216 
217   if (v) {
218     *v = 0;
219     if (*nz) {
220       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
221       for ( i=0; i<M; i++ ) { /* for each block in the block row */
222         v_i  = *v + i*bs;
223         aa_i = aa + bs2*(ai[bn] + i);
224         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
225       }
226     }
227   }
228 
229   if (idx) {
230     *idx = 0;
231     if (*nz) {
232       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
233       for ( i=0; i<M; i++ ) { /* for each block in the block row */
234         idx_i = *idx + i*bs;
235         itmp  = bs*aj[ai[bn] + i];
236         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
237       }
238     }
239   }
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNC__
244 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
245 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
246 {
247   PetscFunctionBegin;
248   if (idx) {if (*idx) PetscFree(*idx);}
249   if (v)   {if (*v)   PetscFree(*v);}
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNC__
254 #define __FUNC__ "MatTranspose_SeqBAIJ"
255 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
256 {
257   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
258   Mat         C;
259   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
260   int         *rows,*cols,bs2=a->bs2;
261   MatScalar   *array = a->a;
262 
263   PetscFunctionBegin;
264   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
265   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
266   PetscMemzero(col,(1+nbs)*sizeof(int));
267 
268   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
269   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
270   PetscFree(col);
271   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
272   cols = rows + bs;
273   for ( i=0; i<mbs; i++ ) {
274     cols[0] = i*bs;
275     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
276     len = ai[i+1] - ai[i];
277     for ( j=0; j<len; j++ ) {
278       rows[0] = (*aj++)*bs;
279       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
280       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
281       array += bs2;
282     }
283   }
284   PetscFree(rows);
285 
286   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
287   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
288 
289   if (B != PETSC_NULL) {
290     *B = C;
291   } else {
292     PetscOps *Abops;
293     MatOps   Aops;
294 
295     /* This isn't really an in-place transpose */
296     PetscFree(a->a);
297     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
298     if (a->diag) PetscFree(a->diag);
299     if (a->ilen) PetscFree(a->ilen);
300     if (a->imax) PetscFree(a->imax);
301     if (a->solve_work) PetscFree(a->solve_work);
302     PetscFree(a);
303 
304 
305     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
306     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
307 
308     /*
309        This is horrible, horrible code. We need to keep the
310       A pointers for the bops and ops but copy everything
311       else from C.
312     */
313     Abops    = A->bops;
314     Aops     = A->ops;
315     PetscMemcpy(A,C,sizeof(struct _p_Mat));
316     A->bops  = Abops;
317     A->ops   = Aops;
318     A->qlist = 0;
319 
320     PetscHeaderDestroy(C);
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 
326 
327 
328 #undef __FUNC__
329 #define __FUNC__ "MatView_SeqBAIJ_Binary"
330 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
331 {
332   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
333   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
334   Scalar      *aa;
335 
336   PetscFunctionBegin;
337   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
338   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
339   col_lens[0] = MAT_COOKIE;
340 
341   col_lens[1] = a->m;
342   col_lens[2] = a->n;
343   col_lens[3] = a->nz*bs2;
344 
345   /* store lengths of each row and write (including header) to file */
346   count = 0;
347   for ( i=0; i<a->mbs; i++ ) {
348     for ( j=0; j<bs; j++ ) {
349       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
350     }
351   }
352   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
353   PetscFree(col_lens);
354 
355   /* store column indices (zero start index) */
356   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
357   count = 0;
358   for ( i=0; i<a->mbs; i++ ) {
359     for ( j=0; j<bs; j++ ) {
360       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
361         for ( l=0; l<bs; l++ ) {
362           jj[count++] = bs*a->j[k] + l;
363         }
364       }
365     }
366   }
367   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
368   PetscFree(jj);
369 
370   /* store nonzero values */
371   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
372   count = 0;
373   for ( i=0; i<a->mbs; i++ ) {
374     for ( j=0; j<bs; j++ ) {
375       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
376         for ( l=0; l<bs; l++ ) {
377           aa[count++] = a->a[bs2*k + l*bs + j];
378         }
379       }
380     }
381   }
382   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
383   PetscFree(aa);
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNC__
388 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
389 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
390 {
391   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
392   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
393   FILE        *fd;
394   char        *outputname;
395 
396   PetscFunctionBegin;
397   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
398   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
399   ierr = ViewerGetFormat(viewer,&format);
400   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
401     fprintf(fd,"  block size is %d\n",bs);
402   }
403   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
404     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
405   }
406   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
407     for ( i=0; i<a->mbs; i++ ) {
408       for ( j=0; j<bs; j++ ) {
409         fprintf(fd,"row %d:",i*bs+j);
410         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
411           for ( l=0; l<bs; l++ ) {
412 #if defined(USE_PETSC_COMPLEX)
413           if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0)
414             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
415               PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
416           else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0)
417             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
418               PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
419           else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0)
420             fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
421 #else
422           if (a->a[bs2*k + l*bs + j] != 0.0)
423             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
424 #endif
425           }
426         }
427         fprintf(fd,"\n");
428       }
429     }
430   }
431   else {
432     for ( i=0; i<a->mbs; i++ ) {
433       for ( j=0; j<bs; j++ ) {
434         fprintf(fd,"row %d:",i*bs+j);
435         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
436           for ( l=0; l<bs; l++ ) {
437 #if defined(USE_PETSC_COMPLEX)
438           if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
439             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
440               PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
441           }
442           else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
443             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
444               PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
445           }
446           else {
447             fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
448           }
449 #else
450             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
451 #endif
452           }
453         }
454         fprintf(fd,"\n");
455       }
456     }
457   }
458   fflush(fd);
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNC__
463 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
464 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
465 {
466   Mat          A = (Mat) Aa;
467   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
468   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
469   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
470   MatScalar    *aa;
471   MPI_Comm     comm;
472   Viewer       viewer;
473 
474   PetscFunctionBegin;
475   /*
476       This is nasty. If this is called from an originally parallel matrix
477    then all processes call this, but only the first has the matrix so the
478    rest should return immediately.
479   */
480   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
481   MPI_Comm_rank(comm,&rank);
482   if (rank) PetscFunctionReturn(0);
483 
484   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
485 
486   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr);
487 
488   /* loop over matrix elements drawing boxes */
489   color = DRAW_BLUE;
490   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
491     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
492       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
493       x_l = a->j[j]*bs; x_r = x_l + 1.0;
494       aa = a->a + j*bs2;
495       for ( k=0; k<bs; k++ ) {
496         for ( l=0; l<bs; l++ ) {
497           if (PetscReal(*aa++) >=  0.) continue;
498           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
499         }
500       }
501     }
502   }
503   color = DRAW_CYAN;
504   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
505     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
506       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
507       x_l = a->j[j]*bs; x_r = x_l + 1.0;
508       aa = a->a + j*bs2;
509       for ( k=0; k<bs; k++ ) {
510         for ( l=0; l<bs; l++ ) {
511           if (PetscReal(*aa++) != 0.) continue;
512           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
513         }
514       }
515     }
516   }
517 
518   color = DRAW_RED;
519   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
520     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
521       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
522       x_l = a->j[j]*bs; x_r = x_l + 1.0;
523       aa = a->a + j*bs2;
524       for ( k=0; k<bs; k++ ) {
525         for ( l=0; l<bs; l++ ) {
526           if (PetscReal(*aa++) <= 0.) continue;
527           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
528         }
529       }
530     }
531   }
532   PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNC__
536 #define __FUNC__ "MatView_SeqBAIJ_Draw"
537 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
538 {
539   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
540   int          ierr;
541   double       xl,yl,xr,yr,w,h;
542   Draw         draw;
543   PetscTruth   isnull;
544 
545   PetscFunctionBegin;
546 
547   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
548   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
549 
550   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
551   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
552   xr += w;    yr += h;  xl = -w;     yl = -h;
553   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
554   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr);
555   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNC__
560 #define __FUNC__ "MatView_SeqBAIJ"
561 int MatView_SeqBAIJ(Mat A,Viewer viewer)
562 {
563   ViewerType  vtype;
564   int         ierr;
565 
566   PetscFunctionBegin;
567   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
568   if (PetscTypeCompare(vtype,MATLAB_VIEWER)) {
569     SETERRQ(PETSC_ERR_SUP,0,"Matlab viewer not supported");
570   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
571     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
572   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
573     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
574   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
575     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
576   } else {
577     SETERRQ(1,1,"Viewer type not supported by PETSc object");
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 
583 #undef __FUNC__
584 #define __FUNC__ "MatGetValues_SeqBAIJ"
585 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
586 {
587   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
588   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
589   int        *ai = a->i, *ailen = a->ilen;
590   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
591   MatScalar  *ap, *aa = a->a, zero = 0.0;
592 
593   PetscFunctionBegin;
594   for ( k=0; k<m; k++ ) { /* loop over rows */
595     row  = im[k]; brow = row/bs;
596     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
597     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
598     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
599     nrow = ailen[brow];
600     for ( l=0; l<n; l++ ) { /* loop over columns */
601       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
602       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
603       col  = in[l] ;
604       bcol = col/bs;
605       cidx = col%bs;
606       ridx = row%bs;
607       high = nrow;
608       low  = 0; /* assume unsorted */
609       while (high-low > 5) {
610         t = (low+high)/2;
611         if (rp[t] > bcol) high = t;
612         else             low  = t;
613       }
614       for ( i=low; i<high; i++ ) {
615         if (rp[i] > bcol) break;
616         if (rp[i] == bcol) {
617           *v++ = ap[bs2*i+bs*cidx+ridx];
618           goto finished;
619         }
620       }
621       *v++ = zero;
622       finished:;
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 
629 #undef __FUNC__
630 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
631 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
632 {
633   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
634   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
635   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
636   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
637   Scalar      *value = v;
638   MatScalar   *ap,*aa=a->a,*bap;
639 
640   PetscFunctionBegin;
641   if (roworiented) {
642     stepval = (n-1)*bs;
643   } else {
644     stepval = (m-1)*bs;
645   }
646   for ( k=0; k<m; k++ ) { /* loop over added rows */
647     row  = im[k];
648 #if defined(USE_PETSC_BOPT_g)
649     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
650     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
651 #endif
652     rp   = aj + ai[row];
653     ap   = aa + bs2*ai[row];
654     rmax = imax[row];
655     nrow = ailen[row];
656     low  = 0;
657     for ( l=0; l<n; l++ ) { /* loop over added columns */
658 #if defined(USE_PETSC_BOPT_g)
659       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
660       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
661 #endif
662       col = in[l];
663       if (roworiented) {
664         value = v + k*(stepval+bs)*bs + l*bs;
665       } else {
666         value = v + l*(stepval+bs)*bs + k*bs;
667       }
668       if (!sorted) low = 0; high = nrow;
669       while (high-low > 7) {
670         t = (low+high)/2;
671         if (rp[t] > col) high = t;
672         else             low  = t;
673       }
674       for ( i=low; i<high; i++ ) {
675         if (rp[i] > col) break;
676         if (rp[i] == col) {
677           bap  = ap +  bs2*i;
678           if (roworiented) {
679             if (is == ADD_VALUES) {
680               for ( ii=0; ii<bs; ii++,value+=stepval ) {
681                 for (jj=ii; jj<bs2; jj+=bs ) {
682                   bap[jj] += *value++;
683                 }
684               }
685             } else {
686               for ( ii=0; ii<bs; ii++,value+=stepval ) {
687                 for (jj=ii; jj<bs2; jj+=bs ) {
688                   bap[jj] = *value++;
689                 }
690               }
691             }
692           } else {
693             if (is == ADD_VALUES) {
694               for ( ii=0; ii<bs; ii++,value+=stepval ) {
695                 for (jj=0; jj<bs; jj++ ) {
696                   *bap++ += *value++;
697                 }
698               }
699             } else {
700               for ( ii=0; ii<bs; ii++,value+=stepval ) {
701                 for (jj=0; jj<bs; jj++ ) {
702                   *bap++  = *value++;
703                 }
704               }
705             }
706           }
707           goto noinsert2;
708         }
709       }
710       if (nonew == 1) goto noinsert2;
711       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
712       if (nrow >= rmax) {
713         /* there is no extra room in row, therefore enlarge */
714         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
715         MatScalar *new_a;
716 
717         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
718 
719         /* malloc new storage space */
720         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
721         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
722         new_j   = (int *) (new_a + bs2*new_nz);
723         new_i   = new_j + new_nz;
724 
725         /* copy over old data into new slots */
726         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
727         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
728         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
729         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
730         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
731         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
732         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
733         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
734         /* free up old matrix storage */
735         PetscFree(a->a);
736         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
737         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
738         a->singlemalloc = 1;
739 
740         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
741         rmax = imax[row] = imax[row] + CHUNKSIZE;
742         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
743         a->maxnz += bs2*CHUNKSIZE;
744         a->reallocs++;
745         a->nz++;
746       }
747       N = nrow++ - 1;
748       /* shift up all the later entries in this row */
749       for ( ii=N; ii>=i; ii-- ) {
750         rp[ii+1] = rp[ii];
751         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
752       }
753       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
754       rp[i] = col;
755       bap   = ap +  bs2*i;
756       if (roworiented) {
757         for ( ii=0; ii<bs; ii++,value+=stepval) {
758           for (jj=ii; jj<bs2; jj+=bs ) {
759             bap[jj] = *value++;
760           }
761         }
762       } else {
763         for ( ii=0; ii<bs; ii++,value+=stepval ) {
764           for (jj=0; jj<bs; jj++ ) {
765             *bap++  = *value++;
766           }
767         }
768       }
769       noinsert2:;
770       low = i;
771     }
772     ailen[row] = nrow;
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 
778 #undef __FUNC__
779 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
780 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
781 {
782   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
783   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
784   int        m = a->m,*ip, N, *ailen = a->ilen;
785   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
786   MatScalar  *aa = a->a, *ap;
787 
788   PetscFunctionBegin;
789   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
790 
791   if (m) rmax = ailen[0];
792   for ( i=1; i<mbs; i++ ) {
793     /* move each row back by the amount of empty slots (fshift) before it*/
794     fshift += imax[i-1] - ailen[i-1];
795     rmax   = PetscMax(rmax,ailen[i]);
796     if (fshift) {
797       ip = aj + ai[i]; ap = aa + bs2*ai[i];
798       N = ailen[i];
799       for ( j=0; j<N; j++ ) {
800         ip[j-fshift] = ip[j];
801         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
802       }
803     }
804     ai[i] = ai[i-1] + ailen[i-1];
805   }
806   if (mbs) {
807     fshift += imax[mbs-1] - ailen[mbs-1];
808     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
809   }
810   /* reset ilen and imax for each row */
811   for ( i=0; i<mbs; i++ ) {
812     ailen[i] = imax[i] = ai[i+1] - ai[i];
813   }
814   a->nz = ai[mbs];
815 
816   /* diagonals may have moved, so kill the diagonal pointers */
817   if (fshift && a->diag) {
818     PetscFree(a->diag);
819     PLogObjectMemory(A,-(m+1)*sizeof(int));
820     a->diag = 0;
821   }
822   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
823            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
824   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
825            a->reallocs);
826   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
827   a->reallocs          = 0;
828   A->info.nz_unneeded  = (double)fshift*bs2;
829 
830   PetscFunctionReturn(0);
831 }
832 
833 
834 /* idx should be of length atlease bs */
835 #undef __FUNC__
836 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
837 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
838 {
839   int i,row;
840 
841   PetscFunctionBegin;
842   row = idx[0];
843   if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
844 
845   for ( i=1; i<bs; i++ ) {
846     if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
847   }
848   *flg = PETSC_TRUE;
849   PetscFunctionReturn(0);
850 }
851 
852 #undef __FUNC__
853 #define __FUNC__ "MatZeroRows_SeqBAIJ"
854 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
855 {
856   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
857   IS          is_local;
858   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
859   PetscTruth  flg;
860   Scalar      zero = 0.0;
861   MatScalar   *aa;
862 
863   PetscFunctionBegin;
864   /* Make a copy of the IS and  sort it */
865   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
866   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
867   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
868   ierr = ISSort(is_local); CHKERRQ(ierr);
869   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
870 
871   i = 0;
872   while (i < is_n) {
873     if (rows[i]<0 || rows[i]>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"row out of range");
874     flg = PETSC_FALSE;
875     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
876     count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
877     aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
878     if (flg) { /* There exists a block of rows to be Zerowed */
879       if (baij->ilen[rows[i]/bs] > 0) {
880         PetscMemzero(aa,count*bs*sizeof(MatScalar));
881         baij->ilen[rows[i]/bs] = 1;
882         baij->j[baij->i[rows[i]/bs]] = rows[i]/bs;
883       }
884       i += bs;
885     } else { /* Zero out only the requested row */
886       for ( j=0; j<count; j++ ) {
887         aa[0] = zero;
888         aa+=bs;
889       }
890       i++;
891     }
892   }
893   if (diag) {
894     for ( j=0; j<is_n; j++ ) {
895       ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
896     }
897   }
898   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
899   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
900   ierr = ISDestroy(is_local); CHKERRQ(ierr);
901   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNC__
906 #define __FUNC__ "MatSetValues_SeqBAIJ"
907 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
908 {
909   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
910   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
911   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
912   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
913   int         ridx,cidx,bs2=a->bs2;
914   MatScalar   *ap,value,*aa=a->a,*bap;
915 
916   PetscFunctionBegin;
917   for ( k=0; k<m; k++ ) { /* loop over added rows */
918     row  = im[k]; brow = row/bs;
919 #if defined(USE_PETSC_BOPT_g)
920     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
921     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
922 #endif
923     rp   = aj + ai[brow];
924     ap   = aa + bs2*ai[brow];
925     rmax = imax[brow];
926     nrow = ailen[brow];
927     low  = 0;
928     for ( l=0; l<n; l++ ) { /* loop over added columns */
929 #if defined(USE_PETSC_BOPT_g)
930       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
931       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
932 #endif
933       col = in[l]; bcol = col/bs;
934       ridx = row % bs; cidx = col % bs;
935       if (roworiented) {
936         value = *v++;
937       } else {
938         value = v[k + l*m];
939       }
940       if (!sorted) low = 0; high = nrow;
941       while (high-low > 7) {
942         t = (low+high)/2;
943         if (rp[t] > bcol) high = t;
944         else              low  = t;
945       }
946       for ( i=low; i<high; i++ ) {
947         if (rp[i] > bcol) break;
948         if (rp[i] == bcol) {
949           bap  = ap +  bs2*i + bs*cidx + ridx;
950           if (is == ADD_VALUES) *bap += value;
951           else                  *bap  = value;
952           goto noinsert1;
953         }
954       }
955       if (nonew == 1) goto noinsert1;
956       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
957       if (nrow >= rmax) {
958         /* there is no extra room in row, therefore enlarge */
959         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
960         MatScalar *new_a;
961 
962         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
963 
964         /* Malloc new storage space */
965         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
966         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
967         new_j   = (int *) (new_a + bs2*new_nz);
968         new_i   = new_j + new_nz;
969 
970         /* copy over old data into new slots */
971         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
972         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
973         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
974         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
975         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
976                                                            len*sizeof(int));
977         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
978         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
979         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
980                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
981         /* free up old matrix storage */
982         PetscFree(a->a);
983         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
984         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
985         a->singlemalloc = 1;
986 
987         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
988         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
989         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
990         a->maxnz += bs2*CHUNKSIZE;
991         a->reallocs++;
992         a->nz++;
993       }
994       N = nrow++ - 1;
995       /* shift up all the later entries in this row */
996       for ( ii=N; ii>=i; ii-- ) {
997         rp[ii+1] = rp[ii];
998         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
999       }
1000       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1001       rp[i]                      = bcol;
1002       ap[bs2*i + bs*cidx + ridx] = value;
1003       noinsert1:;
1004       low = i;
1005     }
1006     ailen[brow] = nrow;
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1012 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1013 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1014 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatGetSubMatrixCall,Mat*);
1015 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1016 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1017 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1018 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1019 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1020 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1021 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1022 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1023 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1024 extern int MatZeroEntries_SeqBAIJ(Mat);
1025 
1026 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1027 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1028 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1029 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1030 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1031 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1032 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1033 
1034 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1035 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1036 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1037 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1038 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1039 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1040 
1041 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1042 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1043 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1044 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1045 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1046 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1047 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1048 
1049 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1050 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1051 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1052 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1056 
1057 #undef __FUNC__
1058 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1059 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1060 {
1061   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1062   Mat         outA;
1063   int         ierr;
1064   PetscTruth  row_identity, col_identity;
1065 
1066   PetscFunctionBegin;
1067   if (fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
1068   ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr);
1069   ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr);
1070   if (!row_identity || !col_identity) {
1071     SETERRQ(1,1,"Row and column permutations must be identity");
1072   }
1073 
1074   outA          = inA;
1075   inA->factor   = FACTOR_LU;
1076   a->row        = row;
1077   a->col        = col;
1078 
1079   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1080   ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr);
1081   PLogObjectParent(inA,a->icol);
1082 
1083   if (!a->solve_work) {
1084     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1085     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1086   }
1087 
1088   if (!a->diag) {
1089     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1090   }
1091   /*
1092       Blocksize 4 and 5 have a special faster factorization/solver for ILU(0) factorization
1093     with natural ordering
1094   */
1095   if (a->bs == 4) {
1096     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1097     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1098     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");
1099   } else if (a->bs == 5) {
1100     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1101     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1102     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");
1103   }
1104 
1105   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1106 
1107 
1108   PetscFunctionReturn(0);
1109 }
1110 #undef __FUNC__
1111 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1112 int MatPrintHelp_SeqBAIJ(Mat A)
1113 {
1114   static int called = 0;
1115   MPI_Comm   comm = A->comm;
1116 
1117   PetscFunctionBegin;
1118   if (called) {PetscFunctionReturn(0);} else called = 1;
1119   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1120   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 EXTERN_C_BEGIN
1125 #undef __FUNC__
1126 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1127 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1128 {
1129   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1130   int         i,nz,n;
1131 
1132   PetscFunctionBegin;
1133   nz = baij->maxnz;
1134   n  = baij->n;
1135   for (i=0; i<nz; i++) {
1136     baij->j[i] = indices[i];
1137   }
1138   baij->nz = nz;
1139   for ( i=0; i<n; i++ ) {
1140     baij->ilen[i] = baij->imax[i];
1141   }
1142 
1143   PetscFunctionReturn(0);
1144 }
1145 EXTERN_C_END
1146 
1147 #undef __FUNC__
1148 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1149 /*@
1150     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1151        in the matrix.
1152 
1153   Input Parameters:
1154 +  mat - the SeqBAIJ matrix
1155 -  indices - the column indices
1156 
1157   Notes:
1158     This can be called if you have precomputed the nonzero structure of the
1159   matrix and want to provide it to the matrix object to improve the performance
1160   of the MatSetValues() operation.
1161 
1162     You MUST have set the correct numbers of nonzeros per row in the call to
1163   MatCreateSeqBAIJ().
1164 
1165     MUST be called before any calls to MatSetValues();
1166 
1167 @*/
1168 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1169 {
1170   int ierr,(*f)(Mat,int *);
1171 
1172   PetscFunctionBegin;
1173   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1174   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);
1175          CHKERRQ(ierr);
1176   if (f) {
1177     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1178   } else {
1179     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 /* -------------------------------------------------------------------*/
1185 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1186        MatGetRow_SeqBAIJ,
1187        MatRestoreRow_SeqBAIJ,
1188        MatMult_SeqBAIJ_N,
1189        MatMultAdd_SeqBAIJ_N,
1190        MatMultTrans_SeqBAIJ,
1191        MatMultTransAdd_SeqBAIJ,
1192        MatSolve_SeqBAIJ_N,
1193        0,
1194        0,
1195        0,
1196        MatLUFactor_SeqBAIJ,
1197        0,
1198        0,
1199        MatTranspose_SeqBAIJ,
1200        MatGetInfo_SeqBAIJ,
1201        MatEqual_SeqBAIJ,
1202        MatGetDiagonal_SeqBAIJ,
1203        MatDiagonalScale_SeqBAIJ,
1204        MatNorm_SeqBAIJ,
1205        0,
1206        MatAssemblyEnd_SeqBAIJ,
1207        0,
1208        MatSetOption_SeqBAIJ,
1209        MatZeroEntries_SeqBAIJ,
1210        MatZeroRows_SeqBAIJ,
1211        MatLUFactorSymbolic_SeqBAIJ,
1212        MatLUFactorNumeric_SeqBAIJ_N,
1213        0,
1214        0,
1215        MatGetSize_SeqBAIJ,
1216        MatGetSize_SeqBAIJ,
1217        MatGetOwnershipRange_SeqBAIJ,
1218        MatILUFactorSymbolic_SeqBAIJ,
1219        0,
1220        0,
1221        0,
1222        MatDuplicate_SeqBAIJ,
1223        0,
1224        0,
1225        MatILUFactor_SeqBAIJ,
1226        0,
1227        0,
1228        MatGetSubMatrices_SeqBAIJ,
1229        MatIncreaseOverlap_SeqBAIJ,
1230        MatGetValues_SeqBAIJ,
1231        0,
1232        MatPrintHelp_SeqBAIJ,
1233        MatScale_SeqBAIJ,
1234        0,
1235        0,
1236        0,
1237        MatGetBlockSize_SeqBAIJ,
1238        MatGetRowIJ_SeqBAIJ,
1239        MatRestoreRowIJ_SeqBAIJ,
1240        0,
1241        0,
1242        0,
1243        0,
1244        0,
1245        0,
1246        MatSetValuesBlocked_SeqBAIJ,
1247        MatGetSubMatrix_SeqBAIJ,
1248        0,
1249        0,
1250        MatGetMaps_Petsc};
1251 
1252 #undef __FUNC__
1253 #define __FUNC__ "MatCreateSeqBAIJ"
1254 /*@C
1255    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1256    compressed row) format.  For good matrix assembly performance the
1257    user should preallocate the matrix storage by setting the parameter nz
1258    (or the array nzz).  By setting these parameters accurately, performance
1259    during matrix assembly can be increased by more than a factor of 50.
1260 
1261    Collective on MPI_Comm
1262 
1263    Input Parameters:
1264 +  comm - MPI communicator, set to PETSC_COMM_SELF
1265 .  bs - size of block
1266 .  m - number of rows
1267 .  n - number of columns
1268 .  nz - number of block nonzeros per block row (same for all rows)
1269 -  nzz - array containing the number of block nonzeros in the various block rows
1270          (possibly different for each block row) or PETSC_NULL
1271 
1272    Output Parameter:
1273 .  A - the matrix
1274 
1275    Options Database Keys:
1276 .   -mat_no_unroll - uses code that does not unroll the loops in the
1277                      block calculations (much slower)
1278 .    -mat_block_size - size of the blocks to use
1279 
1280    Notes:
1281    The block AIJ format is fully compatible with standard Fortran 77
1282    storage.  That is, the stored row and column indices can begin at
1283    either one (as in Fortran) or zero.  See the users' manual for details.
1284 
1285    Specify the preallocated storage with either nz or nnz (not both).
1286    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1287    allocation.  For additional details, see the users manual chapter on
1288    matrices.
1289 
1290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1291 @*/
1292 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1293 {
1294   Mat         B;
1295   Mat_SeqBAIJ *b;
1296   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1297 
1298   PetscFunctionBegin;
1299   MPI_Comm_size(comm,&size);
1300   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1301 
1302   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1303 
1304   if (mbs*bs!=m || nbs*bs!=n) {
1305     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1306   }
1307 
1308   *A = 0;
1309   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1310   PLogObjectCreate(B);
1311   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1312   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1313   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1314   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1315   if (!flg) {
1316     switch (bs) {
1317     case 1:
1318       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1319       B->ops->solve           = MatSolve_SeqBAIJ_1;
1320       B->ops->mult            = MatMult_SeqBAIJ_1;
1321       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1322       break;
1323     case 2:
1324       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1325       B->ops->solve           = MatSolve_SeqBAIJ_2;
1326       B->ops->mult            = MatMult_SeqBAIJ_2;
1327       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1328       break;
1329     case 3:
1330       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1331       B->ops->solve           = MatSolve_SeqBAIJ_3;
1332       B->ops->mult            = MatMult_SeqBAIJ_3;
1333       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1334       break;
1335     case 4:
1336       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1337       B->ops->solve           = MatSolve_SeqBAIJ_4;
1338       B->ops->mult            = MatMult_SeqBAIJ_4;
1339       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1340       break;
1341     case 5:
1342       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1343       B->ops->solve           = MatSolve_SeqBAIJ_5;
1344       B->ops->mult            = MatMult_SeqBAIJ_5;
1345       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1346       break;
1347     case 7:
1348       B->ops->mult            = MatMult_SeqBAIJ_7;
1349       B->ops->solve           = MatSolve_SeqBAIJ_7;
1350       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1351       break;
1352     }
1353   }
1354   B->ops->destroy     = MatDestroy_SeqBAIJ;
1355   B->ops->view        = MatView_SeqBAIJ;
1356   B->factor           = 0;
1357   B->lupivotthreshold = 1.0;
1358   B->mapping          = 0;
1359   b->row              = 0;
1360   b->col              = 0;
1361   b->icol             = 0;
1362   b->reallocs         = 0;
1363 
1364   b->m       = m; B->m = m; B->M = m;
1365   b->n       = n; B->n = n; B->N = n;
1366 
1367   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1368   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1369 
1370   b->mbs     = mbs;
1371   b->nbs     = nbs;
1372   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1373   if (nnz == PETSC_NULL) {
1374     if (nz == PETSC_DEFAULT) nz = 5;
1375     else if (nz <= 0)        nz = 1;
1376     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1377     nz = nz*mbs;
1378   } else {
1379     nz = 0;
1380     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1381   }
1382 
1383   /* allocate the matrix space */
1384   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1385   b->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1386   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1387   b->j  = (int *) (b->a + nz*bs2);
1388   PetscMemzero(b->j,nz*sizeof(int));
1389   b->i  = b->j + nz;
1390   b->singlemalloc = 1;
1391 
1392   b->i[0] = 0;
1393   for (i=1; i<mbs+1; i++) {
1394     b->i[i] = b->i[i-1] + b->imax[i-1];
1395   }
1396 
1397   /* b->ilen will count nonzeros in each block row so far. */
1398   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1399   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1400   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1401 
1402   b->bs               = bs;
1403   b->bs2              = bs2;
1404   b->mbs              = mbs;
1405   b->nz               = 0;
1406   b->maxnz            = nz*bs2;
1407   b->sorted           = 0;
1408   b->roworiented      = 1;
1409   b->nonew            = 0;
1410   b->diag             = 0;
1411   b->solve_work       = 0;
1412   b->mult_work        = 0;
1413   b->spptr            = 0;
1414   B->info.nz_unneeded = (double)b->maxnz;
1415 
1416   *A = B;
1417   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1418   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1419 
1420   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1421                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1422                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1423 
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNC__
1428 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1429 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1430 {
1431   Mat         C;
1432   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1433   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1434 
1435   PetscFunctionBegin;
1436   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1437 
1438   *B = 0;
1439   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1440   PLogObjectCreate(C);
1441   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1442   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1443   C->ops->destroy    = MatDestroy_SeqBAIJ;
1444   C->ops->view       = MatView_SeqBAIJ;
1445   C->factor          = A->factor;
1446   c->row             = 0;
1447   c->col             = 0;
1448   C->assembled       = PETSC_TRUE;
1449 
1450   c->m = C->m   = a->m;
1451   c->n = C->n   = a->n;
1452   C->M          = a->m;
1453   C->N          = a->n;
1454 
1455   c->bs         = a->bs;
1456   c->bs2        = a->bs2;
1457   c->mbs        = a->mbs;
1458   c->nbs        = a->nbs;
1459 
1460   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1461   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1462   for ( i=0; i<mbs; i++ ) {
1463     c->imax[i] = a->imax[i];
1464     c->ilen[i] = a->ilen[i];
1465   }
1466 
1467   /* allocate the matrix space */
1468   c->singlemalloc = 1;
1469   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1470   c->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1471   c->j  = (int *) (c->a + nz*bs2);
1472   c->i  = c->j + nz;
1473   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1474   if (mbs > 0) {
1475     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1476     if (cpvalues == MAT_COPY_VALUES) {
1477       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1478     } else {
1479       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1480     }
1481   }
1482 
1483   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1484   c->sorted      = a->sorted;
1485   c->roworiented = a->roworiented;
1486   c->nonew       = a->nonew;
1487 
1488   if (a->diag) {
1489     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1490     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1491     for ( i=0; i<mbs; i++ ) {
1492       c->diag[i] = a->diag[i];
1493     }
1494   }
1495   else c->diag          = 0;
1496   c->nz                 = a->nz;
1497   c->maxnz              = a->maxnz;
1498   c->solve_work         = 0;
1499   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1500   c->mult_work          = 0;
1501   *B = C;
1502   ierr = PetscObjectComposeFunction((PetscObject)C,"MatSeqBAIJSetColumnIndices_C",
1503                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1504                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNC__
1509 #define __FUNC__ "MatLoad_SeqBAIJ"
1510 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1511 {
1512   Mat_SeqBAIJ  *a;
1513   Mat          B;
1514   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1515   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1516   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1517   int          *masked, nmask,tmp,bs2,ishift;
1518   Scalar       *aa;
1519   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1520 
1521   PetscFunctionBegin;
1522   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1523   bs2  = bs*bs;
1524 
1525   MPI_Comm_size(comm,&size);
1526   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1527   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1528   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
1529   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1530   M = header[1]; N = header[2]; nz = header[3];
1531 
1532   if (header[3] < 0) {
1533     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1534   }
1535 
1536   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1537 
1538   /*
1539      This code adds extra rows to make sure the number of rows is
1540     divisible by the blocksize
1541   */
1542   mbs        = M/bs;
1543   extra_rows = bs - M + bs*(mbs);
1544   if (extra_rows == bs) extra_rows = 0;
1545   else                  mbs++;
1546   if (extra_rows) {
1547     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1548   }
1549 
1550   /* read in row lengths */
1551   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1552   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1553   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1554 
1555   /* read in column indices */
1556   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1557   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
1558   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1559 
1560   /* loop over row lengths determining block row lengths */
1561   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1562   PetscMemzero(browlengths,mbs*sizeof(int));
1563   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1564   PetscMemzero(mask,mbs*sizeof(int));
1565   masked = mask + mbs;
1566   rowcount = 0; nzcount = 0;
1567   for ( i=0; i<mbs; i++ ) {
1568     nmask = 0;
1569     for ( j=0; j<bs; j++ ) {
1570       kmax = rowlengths[rowcount];
1571       for ( k=0; k<kmax; k++ ) {
1572         tmp = jj[nzcount++]/bs;
1573         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1574       }
1575       rowcount++;
1576     }
1577     browlengths[i] += nmask;
1578     /* zero out the mask elements we set */
1579     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1580   }
1581 
1582   /* create our matrix */
1583   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1584   B = *A;
1585   a = (Mat_SeqBAIJ *) B->data;
1586 
1587   /* set matrix "i" values */
1588   a->i[0] = 0;
1589   for ( i=1; i<= mbs; i++ ) {
1590     a->i[i]      = a->i[i-1] + browlengths[i-1];
1591     a->ilen[i-1] = browlengths[i-1];
1592   }
1593   a->nz         = 0;
1594   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1595 
1596   /* read in nonzero values */
1597   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1598   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
1599   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1600 
1601   /* set "a" and "j" values into matrix */
1602   nzcount = 0; jcount = 0;
1603   for ( i=0; i<mbs; i++ ) {
1604     nzcountb = nzcount;
1605     nmask    = 0;
1606     for ( j=0; j<bs; j++ ) {
1607       kmax = rowlengths[i*bs+j];
1608       for ( k=0; k<kmax; k++ ) {
1609         tmp = jj[nzcount++]/bs;
1610 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1611       }
1612       rowcount++;
1613     }
1614     /* sort the masked values */
1615     PetscSortInt(nmask,masked);
1616 
1617     /* set "j" values into matrix */
1618     maskcount = 1;
1619     for ( j=0; j<nmask; j++ ) {
1620       a->j[jcount++]  = masked[j];
1621       mask[masked[j]] = maskcount++;
1622     }
1623     /* set "a" values into matrix */
1624     ishift = bs2*a->i[i];
1625     for ( j=0; j<bs; j++ ) {
1626       kmax = rowlengths[i*bs+j];
1627       for ( k=0; k<kmax; k++ ) {
1628         tmp       = jj[nzcountb]/bs ;
1629         block     = mask[tmp] - 1;
1630         point     = jj[nzcountb] - bs*tmp;
1631         idx       = ishift + bs2*block + j + bs*point;
1632         a->a[idx] = aa[nzcountb++];
1633       }
1634     }
1635     /* zero out the mask elements we set */
1636     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1637   }
1638   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1639 
1640   PetscFree(rowlengths);
1641   PetscFree(browlengths);
1642   PetscFree(aa);
1643   PetscFree(jj);
1644   PetscFree(mask);
1645 
1646   B->assembled = PETSC_TRUE;
1647 
1648   ierr = MatView_Private(B); CHKERRQ(ierr);
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 
1653 
1654