xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b649020645e801c5fe4279b2bd3c4f27e4cbcaae)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.1 1996/02/09 21:28:31 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n,*idx,i;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     n = a->n;
29     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
30     for ( i=0; i<n; i++ ) idx[i] = i;
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
32     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
33     PetscFree(idx);
34     ISSetPermutation(*rperm);
35     ISSetPermutation(*cperm);
36     ISSetIdentity(*rperm);
37     ISSetIdentity(*cperm);
38     return 0;
39   }
40 
41   ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,a->indexshift,&ia,&ja);CHKERRQ(ierr);
42   ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
43 
44   PetscFree(ia); PetscFree(ja);
45   return 0;
46 }
47 
48 
49 #include "draw.h"
50 #include "pinclude/pviewer.h"
51 #include "sysio.h"
52 
53 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
56   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
57   Scalar      *aa;
58 
59   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
60   col_lens = (int *) PetscMalloc( (4+a->m)*sizeof(int) ); CHKPTRQ(col_lens);
61   col_lens[0] = MAT_COOKIE;
62   col_lens[1] = a->m;
63   col_lens[2] = a->n;
64   col_lens[3] = a->nz*bs*bs;
65 
66   /* store lengths of each row and write (including header) to file */
67   count = 0;
68   for ( i=0; i<a->mbs; i++ ) {
69     for ( j=0; j<bs; j++ ) {
70       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
71     }
72   }
73   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
74   PetscFree(col_lens);
75 
76   /* store column indices (zero start index) */
77   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
78   count = 0;
79   if (!a->indexshift) {
80     for ( i=0; i<a->mbs; i++ ) {
81       for ( j=0; j<bs; j++ ) {
82         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
83           for ( l=0; l<bs; l++ ) {
84             jj[count++] = bs*a->j[k] + l;
85 /* printf(" count %d jj %d row %d\n",count-1,jj[count-1],bs*i+j);*/
86           }
87         }
88       }
89     }
90   } else {
91     SETERRQ(1,"Not yet done");
92   }
93   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
94   PetscFree(jj);
95 
96   /* store nonzero values */
97   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
98   count = 0;
99   for ( i=0; i<a->mbs; i++ ) {
100     for ( j=0; j<bs; j++ ) {
101       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
102         for ( l=0; l<bs; l++ ) {
103           aa[count++] = a->a[bs*bs*k + l*bs + j];
104         }
105       }
106     }
107   }
108   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
109   PetscFree(aa);
110   return 0;
111 }
112 
113 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
114 {
115   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
116   int         ierr, i,j, shift = a->indexshift,format,bs = a->bs,k,l;
117   FILE        *fd;
118   char        *outputname;
119 
120   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
121   ierr = ViewerFileGetOutputname_Private(viewer,&outputname); CHKERRQ(ierr);
122   ierr = ViewerFileGetFormat_Private(viewer,&format);
123   if (format == FILE_FORMAT_INFO) {
124     /* no need to print additional information */ ;
125   }
126   else if (format == FILE_FORMAT_MATLAB) {
127     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
128   }
129   else {
130     for ( i=0; i<a->mbs; i++ ) {
131       for ( j=0; j<bs; j++ ) {
132         fprintf(fd,"row %d:",i*bs+j);
133         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
134           for ( l=0; l<bs; l++ ) {
135             fprintf(fd," %d %g",bs*a->j[k]+l-shift,a->a[bs*bs*k + l*bs + j]);
136           }
137         }
138         fprintf(fd,"\n");
139       }
140     }
141   }
142   fflush(fd);
143   return 0;
144 }
145 
146 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
147 {
148   Mat         A = (Mat) obj;
149   PetscObject vobj = (PetscObject) viewer;
150 
151   if (!viewer) {
152     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
153   }
154   if (vobj->cookie == VIEWER_COOKIE) {
155     if (vobj->type == MATLAB_VIEWER) {
156       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
157     }
158     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
159       return MatView_SeqBAIJ_ASCII(A,viewer);
160     }
161     else if (vobj->type == BINARY_FILE_VIEWER) {
162       return MatView_SeqBAIJ_Binary(A,viewer);
163     }
164   }
165   else if (vobj->cookie == DRAW_COOKIE) {
166     if (vobj->type == NULLWINDOW) return 0;
167     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
168   }
169   return 0;
170 }
171 
172 
173 static int MatZeroEntries_SeqBAIJ(Mat A)
174 {
175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
176   PetscMemzero(a->a,a->bs*a->bs*(a->i[a->mbs]+a->indexshift)*sizeof(Scalar));
177   return 0;
178 }
179 
180 int MatDestroy_SeqBAIJ(PetscObject obj)
181 {
182   Mat        A  = (Mat) obj;
183   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
184 
185 #if defined(PETSC_LOG)
186   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
187 #endif
188   PetscFree(a->a);
189   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
190   if (a->diag) PetscFree(a->diag);
191   if (a->ilen) PetscFree(a->ilen);
192   if (a->imax) PetscFree(a->imax);
193   if (a->solve_work) PetscFree(a->solve_work);
194   PetscFree(a);
195   PLogObjectDestroy(A);
196   PetscHeaderDestroy(A);
197   return 0;
198 }
199 
200 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
201 {
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
203   if      (op == ROW_ORIENTED)              a->roworiented = 1;
204   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
205   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
206   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
207   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
208   else if (op == ROWS_SORTED ||
209            op == SYMMETRIC_MATRIX ||
210            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
211            op == YES_NEW_DIAGONALS)
212     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
213   else if (op == NO_NEW_DIAGONALS)
214     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
215   else
216     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
217   return 0;
218 }
219 
220 
221 /* -------------------------------------------------------*/
222 /* Should check that shapes of vectors and matrices match */
223 /* -------------------------------------------------------*/
224 #include "pinclude/plapack.h"
225 
226 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
227 {
228   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
229   Scalar          *xg,*yg;
230   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
231   register Scalar x1,x2,x3,x4,x5;
232   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
233   int             bs = a->bs,j,n,bs2 = bs*bs;
234 
235   if (a->indexshift) SETERRQ(PETSC_ERR_SUP,"MatMult_SeqBAIJ:index shift no");
236 
237   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
238   PetscMemzero(y,m*sizeof(Scalar));
239   x     = x;
240   idx   = a->j;
241   v     = a->a;
242   ii    = a->i;
243 
244   switch (bs) {
245     case 1:
246      for ( i=0; i<m; i++ ) {
247        n    = ii[1] - ii[0]; ii++;
248        sum  = 0.0;
249        while (n--) sum += *v++ * x[*idx++];
250        y[i] = sum;
251       }
252       break;
253     case 2:
254       for ( i=0; i<mbs; i++ ) {
255         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
256         sum1 = 0.0; sum2 = 0.0;
257         for ( j=0; j<n; j++ ) {
258           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
259           sum1 += v[0]*x1 + v[2]*x2;
260           sum2 += v[1]*x1 + v[3]*x2;
261           v += 4;
262         }
263         y[0] += sum1; y[1] += sum2;
264         y += 2;
265       }
266       break;
267     case 3:
268       for ( i=0; i<mbs; i++ ) {
269         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
270         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
271         for ( j=0; j<n; j++ ) {
272           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
273           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
274           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
275           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
276           v += 9;
277         }
278         y[0] += sum1; y[1] += sum2; y[2] += sum3;
279         y += 3;
280       }
281       break;
282     case 4:
283       for ( i=0; i<mbs; i++ ) {
284         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
285         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
286         for ( j=0; j<n; j++ ) {
287           xb = x + 4*(*idx++);
288           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
289           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
290           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
291           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
292           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
293           v += 16;
294         }
295         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
296         y += 4;
297       }
298       break;
299     case 5:
300       for ( i=0; i<mbs; i++ ) {
301         n  = ii[1] - ii[0]; ii++; /* number of blocks in row */
302         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
303         for ( j=0; j<n; j++ ) {
304           xb = x + 5*(*idx++);
305           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
306           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
307           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
308           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
309           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
310           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
311           v += 25;
312         }
313         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
314         y += 5;
315       }
316       break;
317       /* block sizes larger then 5 by 5 are handled by BLAS */
318     default: {
319       int  _One = 1; Scalar _DOne = 1.0;
320       for ( i=0; i<mbs; i++ ) {
321         n    = ii[1] - ii[0]; ii++; /* number of blocks in row */
322         for ( j=0; j<n; j++ ) {
323           xb = x + bs*(*idx++);
324           /* do dense mat-vec product */
325           LAgemv_("N",&bs,&bs,&_DOne,v,&bs,xb,&_One,&_DOne,y,&_One);
326           v += bs2;
327         }
328         y += bs;
329       }
330     }
331   }
332   PLogFlops(2*bs2*a->nz - m);
333   return 0;
334 }
335 
336 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nzalloc,int *mem)
337 {
338   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
339   *nz      = a->nz;
340   *nzalloc = a->maxnz;
341   *mem     = (int)A->mem;
342   return 0;
343 }
344 
345 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
346 extern int MatLUFactorNumeric_SeqBAIJ(Mat,Mat*);
347 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
348 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
349 
350 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = a->m; *n = a->n;
354   return 0;
355 }
356 
357 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   *m = 0; *n = a->m;
361   return 0;
362 }
363 
364 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
365 {
366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
367   Scalar      *v = a->a;
368   double      sum = 0.0;
369   int         i;
370 
371   if (type == NORM_FROBENIUS) {
372     for (i=0; i<a->nz; i++ ) {
373 #if defined(PETSC_COMPLEX)
374       sum += real(conj(*v)*(*v)); v++;
375 #else
376       sum += (*v)*(*v); v++;
377 #endif
378     }
379     *norm = sqrt(sum);
380   }
381   else {
382     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
383   }
384   return 0;
385 }
386 
387 
388 /*
389      note: This can only work for identity for row and col. It would
390    be good to check this and otherwise generate an error.
391 */
392 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
393 {
394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
395   int         ierr;
396   Mat         outA;
397 
398   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
399 
400   outA          = inA;
401   inA->factor   = FACTOR_LU;
402   a->row        = row;
403   a->col        = col;
404 
405   a->solve_work = (Scalar *) PetscMalloc((a->m+1)*sizeof(Scalar));CHKPTRQ(a->solve_work);
406 
407 /*
408   if (!a->diag) {
409     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
410   }
411   ierr = MatLUFactorNumeric_SeqBAIJ(inA,&outA); CHKERRQ(ierr);
412 */
413   return 0;
414 }
415 
416 #include "pinclude/plapack.h"
417 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
418 {
419   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
420   int         one = 1, totalnz = a->bs*a->bs*a->nz;
421   BLscal_( &totalnz, alpha, a->a, &one );
422   PLogFlops(totalnz);
423   return 0;
424 }
425 
426 int MatPrintHelp_SeqBAIJ(Mat A)
427 {
428   static int called = 0;
429   MPI_Comm   comm = A->comm;
430 
431   if (called) return 0; else called = 1;
432   MPIU_printf(comm," Options for MATSeqBAIJ and MATMPIAIJ matrix formats (the defaults):\n");
433   MPIU_printf(comm,"  -mat_lu_pivotthreshold <threshold>\n");
434   MPIU_printf(comm,"  -mat_aij_oneindex - internal indices begin at 1 instead of the default 0.\n");
435   return 0;
436 }
437 /* -------------------------------------------------------------------*/
438 static struct _MatOps MatOps = {0,
439        0,0,
440        MatMult_SeqBAIJ,0,
441        0,0,
442        /*MatSolve_SeqBAIJ*/ 0,0,
443        0,0,
444        /*MatLUFactor_SeqBAIJ*/0,0,
445        0,
446        0,
447        MatGetInfo_SeqBAIJ,0,
448        0,0,MatNorm_SeqBAIJ,
449        0,0,
450        0,
451        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
452        MatGetReordering_SeqBAIJ,
453        /*MatLUFactorSymbolic_SeqBAIJ */0,/* MatLUFactorNumeric_SeqBAIJ*/ 0,0,0,
454        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
455        /* MatILUFactorSymbolic_SeqBAIJ */ 0,0,
456        0,0,/*MatConvert_SeqBAIJ*/ 0,
457        0,0,
458        MatConvertSameType_SeqBAIJ,0,0,
459        MatILUFactor_SeqBAIJ,0,0,
460        0,0,
461        0,0,
462        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
463        0};
464 
465 /*@C
466    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
467    (the default parallel PETSc format).  For good matrix assembly performance
468    the user should preallocate the matrix storage by setting the parameter nz
469    (or nzz).  By setting these parameters accurately, performance can be
470    increased by more than a factor of 50.
471 
472    Input Parameters:
473 .  comm - MPI communicator, set to MPI_COMM_SELF
474 .  bs - size of block
475 .  m - number of rows
476 .  n - number of columns
477 .  nz - number of block nonzeros per block row (same for all rows)
478 .  nzz - number of block nonzeros per block row or PETSC_NULL
479          (possibly different for each row)
480 
481    Output Parameter:
482 .  A - the matrix
483 
484    Notes:
485    The BAIJ format, is fully compatible with standard Fortran 77
486    storage.  That is, the stored row and column indices can begin at
487    either one (as in Fortran) or zero.  See the users manual for details.
488 
489    Specify the preallocated storage with either nz or nnz (not both).
490    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
491    allocation.  For additional details, see the users manual chapter on
492    matrices and the file $(PETSC_DIR)/Performance.
493 
494 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
495 @*/
496 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
497 {
498   Mat         B;
499   Mat_SeqBAIJ *b;
500   int         i,len,ierr, flg,mbs = m/bs;
501 
502   if (mbs*bs != m)
503     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
504 
505   *A      = 0;
506   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
507   PLogObjectCreate(B);
508   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
509   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
510   B->destroy          = MatDestroy_SeqBAIJ;
511   B->view             = MatView_SeqBAIJ;
512   B->factor           = 0;
513   B->lupivotthreshold = 1.0;
514   ierr = OptionsGetDouble(PETSC_NULL,"-mat_lu_pivotthreshold",&B->lupivotthreshold, \
515                           &flg); CHKERRQ(ierr);
516   b->row              = 0;
517   b->col              = 0;
518   b->indexshift       = 0;
519   b->reallocs         = 0;
520   ierr = OptionsHasName(PETSC_NULL,"-mat_aij_oneindex", &flg); CHKERRQ(ierr);
521   if (flg) b->indexshift = -1;
522 
523   b->m       = m;
524   b->mbs     = mbs;
525   b->n       = n;
526   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
527   if (nnz == PETSC_NULL) {
528     if (nz == PETSC_DEFAULT) nz = 10;
529     else if (nz <= 0)        nz = 1;
530     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
531     nz = nz*mbs;
532   }
533   else {
534     nz = 0;
535     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
536   }
537 
538   /* allocate the matrix space */
539   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
540   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
541   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
542   b->j  = (int *) (b->a + nz*bs*bs);
543   PetscMemzero(b->j,nz*sizeof(int));
544   b->i  = b->j + nz;
545   b->singlemalloc = 1;
546 
547   b->i[0] = -b->indexshift;
548   for (i=1; i<mbs+1; i++) {
549     b->i[i] = b->i[i-1] + b->imax[i-1];
550   }
551 
552   /* b->ilen will count nonzeros in each block row so far. */
553   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
554   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
555   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
556 
557   b->bs               = bs;
558   b->mbs              = mbs;
559   b->nz               = 0;
560   b->maxnz            = nz;
561   b->sorted           = 0;
562   b->roworiented      = 1;
563   b->nonew            = 0;
564   b->diag             = 0;
565   b->solve_work       = 0;
566   b->spptr            = 0;
567 
568   *A = B;
569   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
570   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
571   return 0;
572 }
573 
574 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
575 {
576   Mat         C;
577   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
578   int         i,len, shift = a->indexshift, mbs = a->mbs, bs = a->bs;
579 
580   *B = 0;
581   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
582   PLogObjectCreate(C);
583   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
584   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
585   C->destroy    = MatDestroy_SeqBAIJ;
586   C->view       = MatView_SeqBAIJ;
587   C->factor     = A->factor;
588   c->row        = 0;
589   c->col        = 0;
590   c->indexshift = shift;
591   C->assembled  = PETSC_TRUE;
592 
593   c->m          = a->m;
594   c->n          = a->n;
595   c->bs         = a->bs;
596   c->mbs        = a->mbs;
597   c->nbs        = a->nbs;
598 
599   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
600   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
601   for ( i=0; i<mbs; i++ ) {
602     c->imax[i] = a->imax[i];
603     c->ilen[i] = a->ilen[i];
604   }
605 
606   /* allocate the matrix space */
607   c->singlemalloc = 1;
608   len   = (mbs+1)*sizeof(int)+(a->i[mbs])*(bs*bs*sizeof(Scalar)+sizeof(int));
609   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
610   c->j  = (int *) (c->a + a->i[mbs]*bs*bs + shift);
611   c->i  = c->j + a->i[mbs] + shift;
612   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
613   if (mbs > 0) {
614     PetscMemcpy(c->j,a->j,(a->i[mbs]+shift)*sizeof(int));
615     if (cpvalues == COPY_VALUES) {
616       PetscMemcpy(c->a,a->a,(bs*bs*a->i[mbs]+shift)*sizeof(Scalar));
617     }
618   }
619 
620   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
621   c->sorted      = a->sorted;
622   c->roworiented = a->roworiented;
623   c->nonew       = a->nonew;
624 
625   if (a->diag) {
626     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
627     PLogObjectMemory(C,(mbs+1)*sizeof(int));
628     for ( i=0; i<mbs; i++ ) {
629       c->diag[i] = a->diag[i];
630     }
631   }
632   else c->diag          = 0;
633   c->nz                 = a->nz;
634   c->maxnz              = a->maxnz;
635   c->solve_work         = 0;
636   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
637 
638   *B = C;
639   return 0;
640 }
641 
642 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
643 {
644   Mat_SeqBAIJ  *a;
645   Mat          B;
646   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,shift,bs=1,flg;
647   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
648   int          kmax,jcount,block,idx,point,nzcountb;
649   Scalar       *aa;
650   PetscObject  vobj = (PetscObject) bview;
651   MPI_Comm     comm = vobj->comm;
652 
653   ierr = OptionsGetInt(PETSC_NULL,"-mat_baij_bs",&bs,&flg);CHKERRQ(ierr);
654 
655   MPI_Comm_size(comm,&size);
656   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
657   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
658   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
659   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object in file");
660   M = header[1]; N = header[2]; nz = header[3];
661 
662   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
663   mbs = M/bs;
664   if (bs*mbs != M) SETERRQ(1,"MatLoad_SeqBAIJ:Rows not divisble by blocksize");
665 
666   /* read in row lengths */
667   rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
668   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
669 
670   /* read in column indices */
671   jj = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(jj);
672   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
673 
674   /* loop over row lengths determining block row lengths */
675   browlengths = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(browlengths);
676   PetscMemzero(browlengths,mbs*sizeof(int));
677   mask  = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(mask);
678   rowcount = 0; nzcount = 0;
679   for ( i=0; i<mbs; i++ ) {
680     PetscMemzero(mask,mbs*sizeof(int));
681     for ( j=0; j<bs; j++ ) {
682       kmax = rowlengths[rowcount];
683       for ( k=0; k<kmax; k++ ) {
684         mask[jj[nzcount++]/bs] = 1;
685       }
686       rowcount++;
687     }
688     for ( j=0; j<mbs; j++ ) {
689       if (mask[j]) browlengths[i]++;
690     }
691   }
692 
693   /* create our matrix */
694   ierr = MatCreateSeqBAIJ(comm,bs,M,N,0,browlengths,A); CHKERRQ(ierr);
695   B = *A;
696   a = (Mat_SeqBAIJ *) B->data;
697   shift = a->indexshift;
698 
699   /* set matrix "i" values */
700   a->i[0] = -shift;
701   for ( i=1; i<= mbs; i++ ) {
702     a->i[i]      = a->i[i-1] + browlengths[i-1];
703     a->ilen[i-1] = browlengths[i-1];
704   }
705   a->nz = 0;
706   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
707 
708   /* read in nonzero values */
709   aa = (Scalar *) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(aa);
710   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
711 
712   /* set "a" and "j" values into matrix */
713   nzcount = 0; jcount = 0;
714   for ( i=0; i<mbs; i++ ) {
715     nzcountb = nzcount;
716     PetscMemzero(mask,mbs*sizeof(int));
717     for ( j=0; j<bs; j++ ) {
718       kmax = rowlengths[i*bs+j];
719       for ( k=0; k<kmax; k++ ) {
720         mask[jj[nzcount++]/bs] = 1;
721       }
722       rowcount++;
723     }
724     /* set "j" values into matrix */
725     maskcount = 1;
726     for ( j=0; j<mbs; j++ ) {
727       if (mask[j]) {
728         a->j[jcount++] = j;
729         mask[j] = maskcount++; /* what nonzero block in this row is j */
730       }
731     }
732     /* set "a" values into matrix */
733     for ( j=0; j<bs; j++ ) {
734       kmax = rowlengths[i*bs+j];
735       for ( k=0; k<kmax; k++ ) {
736         block  = mask[jj[nzcountb]/bs] - 1;
737         point  = jj[nzcountb] - bs*(jj[nzcountb]/bs);
738         idx = bs*bs*(a->i[i] + block) + j + bs*(point);
739 	/* printf("block row %d subrow %d cold %d block %d idx %d val %g a->i[i] %d point %d\n",i,j,k,block,idx,
740 aa[nzcountb],a->i[i],point); */
741         a->a[idx] = aa[nzcountb++];
742       }
743     }
744   }
745   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Bad new");
746 
747   PetscFree(rowlengths);
748   PetscFree(browlengths);
749   PetscFree(aa);
750   PetscFree(jj);
751   PetscFree(mask);
752 
753   B->assembled = PETSC_TRUE;
754 
755   /* MatView(*A,STDOUT_VIEWER_SELF); */
756   return 0;
757 }
758 
759 
760 
761