xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9ea5d5aec97b6e040a49afd1b9ac5121faa7475f)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.2 1996/02/13 05:21:59 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n,*idx,i;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     n = a->n;
29     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
30     for ( i=0; i<n; i++ ) idx[i] = i;
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
32     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
33     PetscFree(idx);
34     ISSetPermutation(*rperm);
35     ISSetPermutation(*cperm);
36     ISSetIdentity(*rperm);
37     ISSetIdentity(*cperm);
38     return 0;
39   }
40 
41   ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,a->indexshift,&ia,&ja);CHKERRQ(ierr);
42   ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
43 
44   PetscFree(ia); PetscFree(ja);
45   return 0;
46 }
47 
48 
49 #include "draw.h"
50 #include "pinclude/pviewer.h"
51 #include "sysio.h"
52 
53 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
56   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
57   Scalar      *aa;
58 
59   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
60   col_lens = (int *) PetscMalloc( (4+a->m)*sizeof(int) ); CHKPTRQ(col_lens);
61   col_lens[0] = MAT_COOKIE;
62   col_lens[1] = a->m;
63   col_lens[2] = a->n;
64   col_lens[3] = a->nz*bs*bs;
65 
66   /* store lengths of each row and write (including header) to file */
67   count = 0;
68   for ( i=0; i<a->mbs; i++ ) {
69     for ( j=0; j<bs; j++ ) {
70       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
71     }
72   }
73   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
74   PetscFree(col_lens);
75 
76   /* store column indices (zero start index) */
77   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
78   count = 0;
79   if (!a->indexshift) {
80     for ( i=0; i<a->mbs; i++ ) {
81       for ( j=0; j<bs; j++ ) {
82         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
83           for ( l=0; l<bs; l++ ) {
84             jj[count++] = bs*a->j[k] + l;
85 /* printf(" count %d jj %d row %d\n",count-1,jj[count-1],bs*i+j);*/
86           }
87         }
88       }
89     }
90   } else {
91     SETERRQ(1,"Not yet done");
92   }
93   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
94   PetscFree(jj);
95 
96   /* store nonzero values */
97   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
98   count = 0;
99   for ( i=0; i<a->mbs; i++ ) {
100     for ( j=0; j<bs; j++ ) {
101       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
102         for ( l=0; l<bs; l++ ) {
103           aa[count++] = a->a[bs*bs*k + l*bs + j];
104         }
105       }
106     }
107   }
108   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
109   PetscFree(aa);
110   return 0;
111 }
112 
113 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
114 {
115   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
116   int         ierr, i,j, shift = a->indexshift,format,bs = a->bs,k,l;
117   FILE        *fd;
118   char        *outputname;
119 
120   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
121   ierr = ViewerFileGetOutputname_Private(viewer,&outputname); CHKERRQ(ierr);
122   ierr = ViewerFileGetFormat_Private(viewer,&format);
123   if (format == FILE_FORMAT_INFO) {
124     /* no need to print additional information */ ;
125   }
126   else if (format == FILE_FORMAT_MATLAB) {
127     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
128   }
129   else {
130     for ( i=0; i<a->mbs; i++ ) {
131       for ( j=0; j<bs; j++ ) {
132         fprintf(fd,"row %d:",i*bs+j);
133         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
134           for ( l=0; l<bs; l++ ) {
135             fprintf(fd," %d %g",bs*a->j[k]+l-shift,a->a[bs*bs*k + l*bs + j]);
136           }
137         }
138         fprintf(fd,"\n");
139       }
140     }
141   }
142   fflush(fd);
143   return 0;
144 }
145 
146 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
147 {
148   Mat         A = (Mat) obj;
149   PetscObject vobj = (PetscObject) viewer;
150 
151   if (!viewer) {
152     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
153   }
154   if (vobj->cookie == VIEWER_COOKIE) {
155     if (vobj->type == MATLAB_VIEWER) {
156       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
157     }
158     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
159       return MatView_SeqBAIJ_ASCII(A,viewer);
160     }
161     else if (vobj->type == BINARY_FILE_VIEWER) {
162       return MatView_SeqBAIJ_Binary(A,viewer);
163     }
164   }
165   else if (vobj->cookie == DRAW_COOKIE) {
166     if (vobj->type == NULLWINDOW) return 0;
167     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
168   }
169   return 0;
170 }
171 
172 
173 static int MatZeroEntries_SeqBAIJ(Mat A)
174 {
175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
176   PetscMemzero(a->a,a->bs*a->bs*(a->i[a->mbs]+a->indexshift)*sizeof(Scalar));
177   return 0;
178 }
179 
180 int MatDestroy_SeqBAIJ(PetscObject obj)
181 {
182   Mat        A  = (Mat) obj;
183   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
184 
185 #if defined(PETSC_LOG)
186   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
187 #endif
188   PetscFree(a->a);
189   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
190   if (a->diag) PetscFree(a->diag);
191   if (a->ilen) PetscFree(a->ilen);
192   if (a->imax) PetscFree(a->imax);
193   if (a->solve_work) PetscFree(a->solve_work);
194   PetscFree(a);
195   PLogObjectDestroy(A);
196   PetscHeaderDestroy(A);
197   return 0;
198 }
199 
200 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
201 {
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
203   if      (op == ROW_ORIENTED)              a->roworiented = 1;
204   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
205   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
206   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
207   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
208   else if (op == ROWS_SORTED ||
209            op == SYMMETRIC_MATRIX ||
210            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
211            op == YES_NEW_DIAGONALS)
212     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
213   else if (op == NO_NEW_DIAGONALS)
214     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
215   else
216     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
217   return 0;
218 }
219 
220 
221 /* -------------------------------------------------------*/
222 /* Should check that shapes of vectors and matrices match */
223 /* -------------------------------------------------------*/
224 #include "pinclude/plapack.h"
225 
226 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
227 {
228   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
229   Scalar          *xg,*yg;
230   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
231   register Scalar x1,x2,x3,x4,x5;
232   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
233   int             bs = a->bs,j,n,bs2 = bs*bs;
234 
235   if (a->indexshift) SETERRQ(PETSC_ERR_SUP,"MatMult_SeqBAIJ:index shift no");
236 
237   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
238   PetscMemzero(y,m*sizeof(Scalar));
239   x     = x;
240   idx   = a->j;
241   v     = a->a;
242   ii    = a->i;
243 
244   switch (bs) {
245     case 1:
246      for ( i=0; i<m; i++ ) {
247        n    = ii[1] - ii[0]; ii++;
248        sum  = 0.0;
249        while (n--) sum += *v++ * x[*idx++];
250        y[i] = sum;
251       }
252       break;
253     case 2:
254       for ( i=0; i<mbs; i++ ) {
255         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
256         sum1 = 0.0; sum2 = 0.0;
257         for ( j=0; j<n; j++ ) {
258           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
259           sum1 += v[0]*x1 + v[2]*x2;
260           sum2 += v[1]*x1 + v[3]*x2;
261           v += 4;
262         }
263         y[0] += sum1; y[1] += sum2;
264         y += 2;
265       }
266       break;
267     case 3:
268       for ( i=0; i<mbs; i++ ) {
269         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
270         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
271         for ( j=0; j<n; j++ ) {
272           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
273           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
274           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
275           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
276           v += 9;
277         }
278         y[0] += sum1; y[1] += sum2; y[2] += sum3;
279         y += 3;
280       }
281       break;
282     case 4:
283       for ( i=0; i<mbs; i++ ) {
284         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
285         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
286         for ( j=0; j<n; j++ ) {
287           xb = x + 4*(*idx++);
288           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
289           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
290           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
291           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
292           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
293           v += 16;
294         }
295         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
296         y += 4;
297       }
298       break;
299     case 5:
300       for ( i=0; i<mbs; i++ ) {
301         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
302         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
303         for ( j=0; j<n; j++ ) {
304           xb = x + 5*(*idx++);
305           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
306           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
307           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
308           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
309           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
310           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
311           v += 25;
312         }
313         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
314         y += 5;
315       }
316       break;
317       /* block sizes larger then 5 by 5 are handled by BLAS */
318     default: {
319       int  _One = 1; Scalar _DOne = 1.0;
320       for ( i=0; i<mbs; i++ ) {
321         n    = ii[1] - ii[0]; ii++; /* number of blocks in row */
322         for ( j=0; j<n; j++ ) {
323           xb = x + bs*(*idx++);
324           /* do dense mat-vec product */
325           LAgemv_("N",&bs,&bs,&_DOne,v,&bs,xb,&_One,&_DOne,y,&_One);
326           v += bs2;
327         }
328         y += bs;
329       }
330     }
331   }
332   PLogFlops(2*bs2*a->nz - m);
333   return 0;
334 }
335 
336 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nzalloc,int *mem)
337 {
338   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
339   *nz      = a->bs*a->bs*a->nz;
340   *nzalloc = a->maxnz;
341   *mem     = (int)A->mem;
342   return 0;
343 }
344 
345 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
346 extern int MatLUFactorNumeric_SeqBAIJ(Mat,Mat*);
347 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
348 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
349 
350 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = a->m; *n = a->n;
354   return 0;
355 }
356 
357 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   *m = 0; *n = a->m;
361   return 0;
362 }
363 
364 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
365 {
366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
367   Scalar      *v = a->a;
368   double      sum = 0.0;
369   int         i;
370 
371   if (type == NORM_FROBENIUS) {
372     for (i=0; i<a->nz; i++ ) {
373 #if defined(PETSC_COMPLEX)
374       sum += real(conj(*v)*(*v)); v++;
375 #else
376       sum += (*v)*(*v); v++;
377 #endif
378     }
379     *norm = sqrt(sum);
380   }
381   else {
382     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
383   }
384   return 0;
385 }
386 
387 
388 /*
389      note: This can only work for identity for row and col. It would
390    be good to check this and otherwise generate an error.
391 */
392 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
393 {
394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
395   int         ierr;
396   Mat         outA;
397 
398   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
399 
400   outA          = inA;
401   inA->factor   = FACTOR_LU;
402   a->row        = row;
403   a->col        = col;
404 
405   a->solve_work = (Scalar *) PetscMalloc((a->m+1)*sizeof(Scalar));CHKPTRQ(a->solve_work);
406 
407 /*
408   if (!a->diag) {
409     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
410   }
411   ierr = MatLUFactorNumeric_SeqBAIJ(inA,&outA); CHKERRQ(ierr);
412 */
413   return 0;
414 }
415 
416 #include "pinclude/plapack.h"
417 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
418 {
419   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
420   int         one = 1, totalnz = a->bs*a->bs*a->nz;
421   BLscal_( &totalnz, alpha, a->a, &one );
422   PLogFlops(totalnz);
423   return 0;
424 }
425 
426 int MatPrintHelp_SeqBAIJ(Mat A)
427 {
428   static int called = 0;
429   MPI_Comm   comm = A->comm;
430 
431   if (called) return 0; else called = 1;
432   MPIU_printf(comm," Options for MATSeqBAIJ and MATMPIAIJ matrix formats (the defaults):\n");
433   MPIU_printf(comm,"  -mat_lu_pivotthreshold <threshold>\n");
434   MPIU_printf(comm,"  -mat_aij_oneindex - internal indices begin at 1 instead of the default 0.\n");
435   return 0;
436 }
437 /* -------------------------------------------------------------------*/
438 static struct _MatOps MatOps = {0,
439        0,0,
440        MatMult_SeqBAIJ,0,
441        0,0,
442        /*MatSolve_SeqBAIJ*/ 0,0,
443        0,0,
444        /*MatLUFactor_SeqBAIJ*/0,0,
445        0,
446        0,
447        MatGetInfo_SeqBAIJ,0,
448        0,0,MatNorm_SeqBAIJ,
449        0,0,
450        0,
451        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
452        MatGetReordering_SeqBAIJ,
453        /*MatLUFactorSymbolic_SeqBAIJ */0,/* MatLUFactorNumeric_SeqBAIJ*/ 0,0,0,
454        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
455        /* MatILUFactorSymbolic_SeqBAIJ */ 0,0,
456        0,0,/*MatConvert_SeqBAIJ*/ 0,
457        0,0,
458        MatConvertSameType_SeqBAIJ,0,0,
459        MatILUFactor_SeqBAIJ,0,0,
460        0,0,
461        0,0,
462        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
463        0};
464 
465 /*@C
466    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
467    (the default parallel PETSc format).  For good matrix assembly performance
468    the user should preallocate the matrix storage by setting the parameter nz
469    (or nzz).  By setting these parameters accurately, performance can be
470    increased by more than a factor of 50.
471 
472    Input Parameters:
473 .  comm - MPI communicator, set to MPI_COMM_SELF
474 .  bs - size of block
475 .  m - number of rows
476 .  n - number of columns
477 .  nz - number of block nonzeros per block row (same for all rows)
478 .  nzz - number of block nonzeros per block row or PETSC_NULL
479          (possibly different for each row)
480 
481    Output Parameter:
482 .  A - the matrix
483 
484    Notes:
485    The BAIJ format, is fully compatible with standard Fortran 77
486    storage.  That is, the stored row and column indices can begin at
487    either one (as in Fortran) or zero.  See the users manual for details.
488 
489    Specify the preallocated storage with either nz or nnz (not both).
490    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
491    allocation.  For additional details, see the users manual chapter on
492    matrices and the file $(PETSC_DIR)/Performance.
493 
494 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
495 @*/
496 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
497 {
498   Mat         B;
499   Mat_SeqBAIJ *b;
500   int         i,len,ierr, flg,mbs = m/bs;
501 
502   if (mbs*bs != m)
503     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
504 
505   *A      = 0;
506   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
507   PLogObjectCreate(B);
508   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
509   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
510   B->destroy          = MatDestroy_SeqBAIJ;
511   B->view             = MatView_SeqBAIJ;
512   B->factor           = 0;
513   B->lupivotthreshold = 1.0;
514   ierr = OptionsGetDouble(PETSC_NULL,"-mat_lu_pivotthreshold",&B->lupivotthreshold, \
515                           &flg); CHKERRQ(ierr);
516   b->row              = 0;
517   b->col              = 0;
518   b->indexshift       = 0;
519   b->reallocs         = 0;
520   ierr = OptionsHasName(PETSC_NULL,"-mat_aij_oneindex", &flg); CHKERRQ(ierr);
521   if (flg) b->indexshift = -1;
522 
523   b->m       = m;
524   b->mbs     = mbs;
525   b->n       = n;
526   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
527   if (nnz == PETSC_NULL) {
528     if (nz == PETSC_DEFAULT) nz = 10;
529     else if (nz <= 0)        nz = 1;
530     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
531     nz = nz*mbs;
532   }
533   else {
534     nz = 0;
535     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
536   }
537 
538   /* allocate the matrix space */
539   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
540   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
541   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
542   b->j  = (int *) (b->a + nz*bs*bs);
543   PetscMemzero(b->j,nz*sizeof(int));
544   b->i  = b->j + nz;
545   b->singlemalloc = 1;
546 
547   b->i[0] = -b->indexshift;
548   for (i=1; i<mbs+1; i++) {
549     b->i[i] = b->i[i-1] + b->imax[i-1];
550   }
551 
552   /* b->ilen will count nonzeros in each block row so far. */
553   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
554   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
555   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
556 
557   b->bs               = bs;
558   b->mbs              = mbs;
559   b->nz               = 0;
560   b->maxnz            = nz;
561   b->sorted           = 0;
562   b->roworiented      = 1;
563   b->nonew            = 0;
564   b->diag             = 0;
565   b->solve_work       = 0;
566   b->spptr            = 0;
567 
568   *A = B;
569   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
570   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
571   return 0;
572 }
573 
574 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
575 {
576   Mat         C;
577   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
578   int         i,len, shift = a->indexshift, mbs = a->mbs, bs = a->bs;
579 
580   *B = 0;
581   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
582   PLogObjectCreate(C);
583   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
584   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
585   C->destroy    = MatDestroy_SeqBAIJ;
586   C->view       = MatView_SeqBAIJ;
587   C->factor     = A->factor;
588   c->row        = 0;
589   c->col        = 0;
590   c->indexshift = shift;
591   C->assembled  = PETSC_TRUE;
592 
593   c->m          = a->m;
594   c->n          = a->n;
595   c->bs         = a->bs;
596   c->mbs        = a->mbs;
597   c->nbs        = a->nbs;
598 
599   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
600   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
601   for ( i=0; i<mbs; i++ ) {
602     c->imax[i] = a->imax[i];
603     c->ilen[i] = a->ilen[i];
604   }
605 
606   /* allocate the matrix space */
607   c->singlemalloc = 1;
608   len   = (mbs+1)*sizeof(int)+(a->i[mbs])*(bs*bs*sizeof(Scalar)+sizeof(int));
609   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
610   c->j  = (int *) (c->a + a->i[mbs]*bs*bs + shift);
611   c->i  = c->j + a->i[mbs] + shift;
612   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
613   if (mbs > 0) {
614     PetscMemcpy(c->j,a->j,(a->i[mbs]+shift)*sizeof(int));
615     if (cpvalues == COPY_VALUES) {
616       PetscMemcpy(c->a,a->a,(bs*bs*a->i[mbs]+shift)*sizeof(Scalar));
617     }
618   }
619 
620   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
621   c->sorted      = a->sorted;
622   c->roworiented = a->roworiented;
623   c->nonew       = a->nonew;
624 
625   if (a->diag) {
626     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
627     PLogObjectMemory(C,(mbs+1)*sizeof(int));
628     for ( i=0; i<mbs; i++ ) {
629       c->diag[i] = a->diag[i];
630     }
631   }
632   else c->diag          = 0;
633   c->nz                 = a->nz;
634   c->maxnz              = a->maxnz;
635   c->solve_work         = 0;
636   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
637 
638   *B = C;
639   return 0;
640 }
641 
642 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
643 {
644   Mat_SeqBAIJ  *a;
645   Mat          B;
646   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,shift,bs=1,flg;
647   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
648   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
649   int          *masked, nmask,tmp;
650   Scalar       *aa;
651   PetscObject  vobj = (PetscObject) bview;
652   MPI_Comm     comm = vobj->comm;
653 
654   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
655 
656   MPI_Comm_size(comm,&size);
657   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
658   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
659   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
660   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object in file");
661   M = header[1]; N = header[2]; nz = header[3];
662 
663   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
664 
665   /*
666      This code adds extra rows to make sure the number of rows is
667     divisible by the blocksize
668   */
669   mbs        = M/bs;
670   extra_rows = bs - M + bs*(mbs);
671   if (extra_rows == bs) extra_rows = 0;
672   else                  mbs++;
673   if (extra_rows) {
674     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
675   }
676 
677   /* read in row lengths */
678   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
679   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
680   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
681 
682   /* read in column indices */
683   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
684   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
685   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
686 
687   /* loop over row lengths determining block row lengths */
688   browlengths = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(browlengths);
689   PetscMemzero(browlengths,mbs*sizeof(int));
690   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
691   PetscMemzero(mask,mbs*sizeof(int));
692   masked = mask + mbs;
693   rowcount = 0; nzcount = 0;
694   for ( i=0; i<mbs; i++ ) {
695     nmask = 0;
696     for ( j=0; j<bs; j++ ) {
697       kmax = rowlengths[rowcount];
698       for ( k=0; k<kmax; k++ ) {
699         tmp = jj[nzcount++]/bs;
700         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
701       }
702       rowcount++;
703     }
704     browlengths[i] += nmask;
705     /* zero out the mask elements we set */
706     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
707   }
708 
709   /* create our matrix */
710   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
711          CHKERRQ(ierr);
712   B = *A;
713   a = (Mat_SeqBAIJ *) B->data;
714   shift = a->indexshift;
715 
716   /* set matrix "i" values */
717   a->i[0] = -shift;
718   for ( i=1; i<= mbs; i++ ) {
719     a->i[i]      = a->i[i-1] + browlengths[i-1];
720     a->ilen[i-1] = browlengths[i-1];
721   }
722   a->nz = 0;
723   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
724 
725   /* read in nonzero values */
726   aa = (Scalar *) PetscMalloc( (nz+extra_rows)*sizeof(Scalar) ); CHKPTRQ(aa);
727   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
728   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
729 
730   /* set "a" and "j" values into matrix */
731   nzcount = 0; jcount = 0;
732   for ( i=0; i<mbs; i++ ) {
733     nzcountb = nzcount;
734     nmask    = 0;
735     for ( j=0; j<bs; j++ ) {
736       kmax = rowlengths[i*bs+j];
737       for ( k=0; k<kmax; k++ ) {
738         tmp = jj[nzcount++]/bs;
739 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
740       }
741       rowcount++;
742     }
743     /* set "j" values into matrix */
744     maskcount = 1;
745     for ( j=0; j<nmask; j++ ) {
746       a->j[jcount++]  = masked[j];
747       mask[masked[j]] = maskcount++; /* what nonzero block in this row is j */
748     }
749     /* set "a" values into matrix */
750     for ( j=0; j<bs; j++ ) {
751       kmax = rowlengths[i*bs+j];
752       for ( k=0; k<kmax; k++ ) {
753         block  = mask[jj[nzcountb]/bs] - 1;
754         point  = jj[nzcountb] - bs*(jj[nzcountb]/bs);
755         idx = bs*bs*(a->i[i] + block) + j + bs*(point);
756 	/* printf("block row %d subrow %d cold %d block %d idx %d val %g a->i[i] %d point %d\n",i,j,k,block,idx,
757 aa[nzcountb],a->i[i],point); */
758         a->a[idx] = aa[nzcountb++];
759       }
760     }
761     /* zero out the mask elements we set */
762     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
763   }
764   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
765 
766   PetscFree(rowlengths);
767   PetscFree(browlengths);
768   PetscFree(aa);
769   PetscFree(jj);
770   PetscFree(mask);
771 
772   B->assembled = PETSC_TRUE;
773   return 0;
774 }
775 
776 
777 
778