xref: /petsc/src/mat/impls/baij/seq/baij.c (revision adee11b887867f1598ec5c1ac3ea74c7de6cdae3)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.5 1996/02/18 00:40:34 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr);
41   ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
42 
43   PetscFree(ia); PetscFree(ja);
44   return 0;
45 }
46 
47 /*
48      Adds diagonal pointers to sparse matrix structure.
49 */
50 
51 int MatMarkDiag_SeqBAIJ(Mat A)
52 {
53   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
54   int         i,j, *diag, m = a->mbs;
55 
56   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
57   PLogObjectMemory(A,(m+1)*sizeof(int));
58   for ( i=0; i<m; i++ ) {
59     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
60       if (a->j[j] == i) {
61         diag[i] = j;
62         break;
63       }
64     }
65   }
66   a->diag = diag;
67   return 0;
68 }
69 
70 #include "draw.h"
71 #include "pinclude/pviewer.h"
72 #include "sysio.h"
73 
74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
78   Scalar      *aa;
79 
80   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
81   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
82   col_lens[0] = MAT_COOKIE;
83   col_lens[1] = a->m;
84   col_lens[2] = a->n;
85   col_lens[3] = a->nz*bs*bs;
86 
87   /* store lengths of each row and write (including header) to file */
88   count = 0;
89   for ( i=0; i<a->mbs; i++ ) {
90     for ( j=0; j<bs; j++ ) {
91       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
92     }
93   }
94   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
95   PetscFree(col_lens);
96 
97   /* store column indices (zero start index) */
98   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
99   count = 0;
100   for ( i=0; i<a->mbs; i++ ) {
101     for ( j=0; j<bs; j++ ) {
102       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
103         for ( l=0; l<bs; l++ ) {
104           jj[count++] = bs*a->j[k] + l;
105         }
106       }
107     }
108   }
109   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
110   PetscFree(jj);
111 
112   /* store nonzero values */
113   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
118         for ( l=0; l<bs; l++ ) {
119           aa[count++] = a->a[bs*bs*k + l*bs + j];
120         }
121       }
122     }
123   }
124   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
125   PetscFree(aa);
126   return 0;
127 }
128 
129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
130 {
131   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
132   int         ierr, i,j,format,bs = a->bs,k,l;
133   FILE        *fd;
134   char        *outputname;
135 
136   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
137   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
138   ierr = ViewerFileGetFormat_Private(viewer,&format);
139   if (format == FILE_FORMAT_INFO) {
140     /* no need to print additional information */ ;
141   }
142   else if (format == FILE_FORMAT_MATLAB) {
143     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
144   }
145   else {
146     for ( i=0; i<a->mbs; i++ ) {
147       for ( j=0; j<bs; j++ ) {
148         fprintf(fd,"row %d:",i*bs+j);
149         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
150           for ( l=0; l<bs; l++ ) {
151             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
152           }
153         }
154         fprintf(fd,"\n");
155       }
156     }
157   }
158   fflush(fd);
159   return 0;
160 }
161 
162 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
163 {
164   Mat         A = (Mat) obj;
165   PetscObject vobj = (PetscObject) viewer;
166 
167   if (!viewer) {
168     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
169   }
170   if (vobj->cookie == VIEWER_COOKIE) {
171     if (vobj->type == MATLAB_VIEWER) {
172       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
173     }
174     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
175       return MatView_SeqBAIJ_ASCII(A,viewer);
176     }
177     else if (vobj->type == BINARY_FILE_VIEWER) {
178       return MatView_SeqBAIJ_Binary(A,viewer);
179     }
180   }
181   else if (vobj->cookie == DRAW_COOKIE) {
182     if (vobj->type == NULLWINDOW) return 0;
183     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
184   }
185   return 0;
186 }
187 
188 
189 static int MatZeroEntries_SeqBAIJ(Mat A)
190 {
191   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
192   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
193   return 0;
194 }
195 
196 int MatDestroy_SeqBAIJ(PetscObject obj)
197 {
198   Mat        A  = (Mat) obj;
199   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
200 
201 #if defined(PETSC_LOG)
202   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
203 #endif
204   PetscFree(a->a);
205   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
206   if (a->diag) PetscFree(a->diag);
207   if (a->ilen) PetscFree(a->ilen);
208   if (a->imax) PetscFree(a->imax);
209   if (a->solve_work) PetscFree(a->solve_work);
210   if (a->mult_work) PetscFree(a->mult_work);
211   PetscFree(a);
212   PLogObjectDestroy(A);
213   PetscHeaderDestroy(A);
214   return 0;
215 }
216 
217 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
218 {
219   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
220   if      (op == ROW_ORIENTED)              a->roworiented = 1;
221   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
222   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
223   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
224   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
225   else if (op == ROWS_SORTED ||
226            op == SYMMETRIC_MATRIX ||
227            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
228            op == YES_NEW_DIAGONALS)
229     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
230   else if (op == NO_NEW_DIAGONALS)
231     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
232   else
233     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
234   return 0;
235 }
236 
237 
238 /* -------------------------------------------------------*/
239 /* Should check that shapes of vectors and matrices match */
240 /* -------------------------------------------------------*/
241 #include "pinclude/plapack.h"
242 
243 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
244 {
245   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
246   Scalar          *xg,*yg;
247   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
248   register Scalar x1,x2,x3,x4,x5;
249   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
250   int             bs = a->bs,j,n,bs2 = bs*bs;
251 
252   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
253   PetscMemzero(y,m*sizeof(Scalar));
254   x     = x;
255   idx   = a->j;
256   v     = a->a;
257   ii    = a->i;
258 
259   switch (bs) {
260     case 1:
261      for ( i=0; i<m; i++ ) {
262        n    = ii[1] - ii[0]; ii++;
263        sum  = 0.0;
264        while (n--) sum += *v++ * x[*idx++];
265        y[i] = sum;
266       }
267       break;
268     case 2:
269       for ( i=0; i<mbs; i++ ) {
270         n  = ii[1] - ii[0]; ii++;
271         sum1 = 0.0; sum2 = 0.0;
272         for ( j=0; j<n; j++ ) {
273           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
274           sum1 += v[0]*x1 + v[2]*x2;
275           sum2 += v[1]*x1 + v[3]*x2;
276           v += 4;
277         }
278         y[0] += sum1; y[1] += sum2;
279         y += 2;
280       }
281       break;
282     case 3:
283       for ( i=0; i<mbs; i++ ) {
284         n  = ii[1] - ii[0]; ii++;
285         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
286         for ( j=0; j<n; j++ ) {
287           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
288           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
289           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
290           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
291           v += 9;
292         }
293         y[0] += sum1; y[1] += sum2; y[2] += sum3;
294         y += 3;
295       }
296       break;
297     case 4:
298       for ( i=0; i<mbs; i++ ) {
299         n  = ii[1] - ii[0]; ii++;
300         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
301         for ( j=0; j<n; j++ ) {
302           xb = x + 4*(*idx++);
303           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
304           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
305           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
306           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
307           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
308           v += 16;
309         }
310         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
311         y += 4;
312       }
313       break;
314     case 5:
315       for ( i=0; i<mbs; i++ ) {
316         n  = ii[1] - ii[0]; ii++;
317         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
318         for ( j=0; j<n; j++ ) {
319           xb = x + 5*(*idx++);
320           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
321           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
322           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
323           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
324           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
325           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
326           v += 25;
327         }
328         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
329         y += 5;
330       }
331       break;
332       /* block sizes larger then 5 by 5 are handled by BLAS */
333     default: {
334       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
335       if (!a->mult_work) {
336         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
337         CHKPTRQ(a->mult_work);
338       }
339       work = a->mult_work;
340       for ( i=0; i<mbs; i++ ) {
341         n     = ii[1] - ii[0]; ii++;
342         ncols = n*bs;
343         workt = work;
344         for ( j=0; j<n; j++ ) {
345           xb = x + bs*(*idx++);
346           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
347           workt += bs;
348         }
349         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
350         v += n*bs2;
351         y += bs;
352       }
353     }
354   }
355   PLogFlops(2*bs2*a->nz - m);
356   return 0;
357 }
358 
359 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
360 {
361   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
362   *nz  = a->bs*a->bs*a->nz;
363   *nza = a->maxnz;
364   *mem = (int)A->mem;
365   return 0;
366 }
367 
368 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
369 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
370 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
371 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
372 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
373 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
374 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
375 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
376 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
377 
378 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
379 {
380   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
381   *m = a->m; *n = a->n;
382   return 0;
383 }
384 
385 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
386 {
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
388   *m = 0; *n = a->m;
389   return 0;
390 }
391 
392 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
393 {
394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
395   Scalar      *v = a->a;
396   double      sum = 0.0;
397   int         i;
398 
399   if (type == NORM_FROBENIUS) {
400     for (i=0; i<a->nz; i++ ) {
401 #if defined(PETSC_COMPLEX)
402       sum += real(conj(*v)*(*v)); v++;
403 #else
404       sum += (*v)*(*v); v++;
405 #endif
406     }
407     *norm = sqrt(sum);
408   }
409   else {
410     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
411   }
412   return 0;
413 }
414 
415 /*
416      note: This can only work for identity for row and col. It would
417    be good to check this and otherwise generate an error.
418 */
419 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
420 {
421   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
422   Mat         outA;
423   int         ierr;
424 
425   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
426 
427   outA          = inA;
428   inA->factor   = FACTOR_LU;
429   a->row        = row;
430   a->col        = col;
431 
432   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
433 
434   if (!a->diag) {
435     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
436   }
437   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
438   return 0;
439 }
440 
441 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
442 {
443   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
444   int         one = 1, totalnz = a->bs*a->bs*a->nz;
445   BLscal_( &totalnz, alpha, a->a, &one );
446   PLogFlops(totalnz);
447   return 0;
448 }
449 
450 int MatPrintHelp_SeqBAIJ(Mat A)
451 {
452   static int called = 0;
453 
454   if (called) return 0; else called = 1;
455   return 0;
456 }
457 /* -------------------------------------------------------------------*/
458 static struct _MatOps MatOps = {0,
459        0,0,
460        MatMult_SeqBAIJ,0,
461        0,0,
462        MatSolve_SeqBAIJ,0,
463        0,0,
464        MatLUFactor_SeqBAIJ,0,
465        0,
466        0,
467        MatGetInfo_SeqBAIJ,0,
468        0,0,MatNorm_SeqBAIJ,
469        0,0,
470        0,
471        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
472        MatGetReordering_SeqBAIJ,
473        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
474        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
475        MatILUFactorSymbolic_SeqBAIJ,0,
476        0,0,/*MatConvert_SeqBAIJ*/ 0,
477        0,0,
478        MatConvertSameType_SeqBAIJ,0,0,
479        MatILUFactor_SeqBAIJ,0,0,
480        0,0,
481        0,0,
482        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
483        0};
484 
485 /*@C
486    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
487    (the default parallel PETSc format).  For good matrix assembly performance
488    the user should preallocate the matrix storage by setting the parameter nz
489    (or nzz).  By setting these parameters accurately, performance can be
490    increased by more than a factor of 50.
491 
492    Input Parameters:
493 .  comm - MPI communicator, set to MPI_COMM_SELF
494 .  bs - size of block
495 .  m - number of rows
496 .  n - number of columns
497 .  nz - number of block nonzeros per block row (same for all rows)
498 .  nzz - number of block nonzeros per block row or PETSC_NULL
499          (possibly different for each row)
500 
501    Output Parameter:
502 .  A - the matrix
503 
504    Notes:
505    The BAIJ format, is fully compatible with standard Fortran 77
506    storage.  That is, the stored row and column indices can begin at
507    either one (as in Fortran) or zero.  See the users manual for details.
508 
509    Specify the preallocated storage with either nz or nnz (not both).
510    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
511    allocation.  For additional details, see the users manual chapter on
512    matrices and the file $(PETSC_DIR)/Performance.
513 
514 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
515 @*/
516 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
517 {
518   Mat         B;
519   Mat_SeqBAIJ *b;
520   int         i,len,ierr, flg,mbs = m/bs;
521 
522   if (mbs*bs != m)
523     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
524 
525   *A      = 0;
526   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
527   PLogObjectCreate(B);
528   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
529   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
530   switch (bs) {
531     case 1:
532       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
533       break;
534     case 2:
535       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
536     break;
537   }
538   B->destroy          = MatDestroy_SeqBAIJ;
539   B->view             = MatView_SeqBAIJ;
540   B->factor           = 0;
541   B->lupivotthreshold = 1.0;
542   b->row              = 0;
543   b->col              = 0;
544   b->reallocs         = 0;
545 
546   b->m       = m;
547   b->mbs     = mbs;
548   b->n       = n;
549   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
550   if (nnz == PETSC_NULL) {
551     if (nz == PETSC_DEFAULT) nz = 5;
552     else if (nz <= 0)        nz = 1;
553     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
554     nz = nz*mbs;
555   }
556   else {
557     nz = 0;
558     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
559   }
560 
561   /* allocate the matrix space */
562   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
563   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
564   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
565   b->j  = (int *) (b->a + nz*bs*bs);
566   PetscMemzero(b->j,nz*sizeof(int));
567   b->i  = b->j + nz;
568   b->singlemalloc = 1;
569 
570   b->i[0] = 0;
571   for (i=1; i<mbs+1; i++) {
572     b->i[i] = b->i[i-1] + b->imax[i-1];
573   }
574 
575   /* b->ilen will count nonzeros in each block row so far. */
576   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
577   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
578   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
579 
580   b->bs               = bs;
581   b->mbs              = mbs;
582   b->nz               = 0;
583   b->maxnz            = nz;
584   b->sorted           = 0;
585   b->roworiented      = 1;
586   b->nonew            = 0;
587   b->diag             = 0;
588   b->solve_work       = 0;
589   b->mult_work        = 0;
590   b->spptr            = 0;
591 
592   *A = B;
593   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
594   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
595   return 0;
596 }
597 
598 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
599 {
600   Mat         C;
601   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
602   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
603 
604   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
605 
606   *B = 0;
607   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
608   PLogObjectCreate(C);
609   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
610   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
611   C->destroy    = MatDestroy_SeqBAIJ;
612   C->view       = MatView_SeqBAIJ;
613   C->factor     = A->factor;
614   c->row        = 0;
615   c->col        = 0;
616   C->assembled  = PETSC_TRUE;
617 
618   c->m          = a->m;
619   c->n          = a->n;
620   c->bs         = a->bs;
621   c->mbs        = a->mbs;
622   c->nbs        = a->nbs;
623 
624   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
625   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
626   for ( i=0; i<mbs; i++ ) {
627     c->imax[i] = a->imax[i];
628     c->ilen[i] = a->ilen[i];
629   }
630 
631   /* allocate the matrix space */
632   c->singlemalloc = 1;
633   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
634   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
635   c->j  = (int *) (c->a + nz*bs*bs);
636   c->i  = c->j + nz;
637   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
638   if (mbs > 0) {
639     PetscMemcpy(c->j,a->j,nz*sizeof(int));
640     if (cpvalues == COPY_VALUES) {
641       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
642     }
643   }
644 
645   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
646   c->sorted      = a->sorted;
647   c->roworiented = a->roworiented;
648   c->nonew       = a->nonew;
649 
650   if (a->diag) {
651     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
652     PLogObjectMemory(C,(mbs+1)*sizeof(int));
653     for ( i=0; i<mbs; i++ ) {
654       c->diag[i] = a->diag[i];
655     }
656   }
657   else c->diag          = 0;
658   c->nz                 = a->nz;
659   c->maxnz              = a->maxnz;
660   c->solve_work         = 0;
661   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
662   c->mult_work          = 0;
663   *B = C;
664   return 0;
665 }
666 
667 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
668 {
669   Mat_SeqBAIJ  *a;
670   Mat          B;
671   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
672   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
673   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
674   int          *masked, nmask,tmp,ishift,bs2;
675   Scalar       *aa;
676   PetscObject  vobj = (PetscObject) bview;
677   MPI_Comm     comm = vobj->comm;
678 
679   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
680   bs2  = bs*bs;
681 
682   MPI_Comm_size(comm,&size);
683   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
684   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
685   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
686   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
687   M = header[1]; N = header[2]; nz = header[3];
688 
689   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
690 
691   /*
692      This code adds extra rows to make sure the number of rows is
693     divisible by the blocksize
694   */
695   mbs        = M/bs;
696   extra_rows = bs - M + bs*(mbs);
697   if (extra_rows == bs) extra_rows = 0;
698   else                  mbs++;
699   if (extra_rows) {
700     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
701   }
702 
703   /* read in row lengths */
704   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
705   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
706   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
707 
708   /* read in column indices */
709   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
710   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
711   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
712 
713   /* loop over row lengths determining block row lengths */
714   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
715   PetscMemzero(browlengths,mbs*sizeof(int));
716   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
717   PetscMemzero(mask,mbs*sizeof(int));
718   masked = mask + mbs;
719   rowcount = 0; nzcount = 0;
720   for ( i=0; i<mbs; i++ ) {
721     nmask = 0;
722     for ( j=0; j<bs; j++ ) {
723       kmax = rowlengths[rowcount];
724       for ( k=0; k<kmax; k++ ) {
725         tmp = jj[nzcount++]/bs;
726         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
727       }
728       rowcount++;
729     }
730     browlengths[i] += nmask;
731     /* zero out the mask elements we set */
732     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
733   }
734 
735   /* create our matrix */
736   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
737          CHKERRQ(ierr);
738   B = *A;
739   a = (Mat_SeqBAIJ *) B->data;
740 
741   /* set matrix "i" values */
742   a->i[0] = 0;
743   for ( i=1; i<= mbs; i++ ) {
744     a->i[i]      = a->i[i-1] + browlengths[i-1];
745     a->ilen[i-1] = browlengths[i-1];
746   }
747   a->nz = 0;
748   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
749 
750   /* read in nonzero values */
751   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
752   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
753   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
754 
755   /* set "a" and "j" values into matrix */
756   nzcount = 0; jcount = 0;
757   for ( i=0; i<mbs; i++ ) {
758     nzcountb = nzcount;
759     nmask    = 0;
760     for ( j=0; j<bs; j++ ) {
761       kmax = rowlengths[i*bs+j];
762       for ( k=0; k<kmax; k++ ) {
763         tmp = jj[nzcount++]/bs;
764 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
765       }
766       rowcount++;
767     }
768     /* sort the masked values */
769     SYIsort(nmask,masked);
770 
771     /* set "j" values into matrix */
772     maskcount = 1;
773     for ( j=0; j<nmask; j++ ) {
774       a->j[jcount++]  = masked[j];
775       mask[masked[j]] = maskcount++;
776     }
777     /* set "a" values into matrix */
778     ishift = bs2*a->i[i];
779     for ( j=0; j<bs; j++ ) {
780       kmax = rowlengths[i*bs+j];
781       for ( k=0; k<kmax; k++ ) {
782         tmp    = jj[nzcountb]/bs ;
783         block  = mask[tmp] - 1;
784         point  = jj[nzcountb] - bs*tmp;
785         idx    = ishift + bs2*block + j + bs*point;
786         a->a[idx] = aa[nzcountb++];
787       }
788     }
789     /* zero out the mask elements we set */
790     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
791   }
792   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
793 
794   PetscFree(rowlengths);
795   PetscFree(browlengths);
796   PetscFree(aa);
797   PetscFree(jj);
798   PetscFree(mask);
799 
800   B->assembled = PETSC_TRUE;
801 
802   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
803   if (flg) {
804     Viewer viewer;
805     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
806     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr);
807     ierr = MatView(B,viewer); CHKERRQ(ierr);
808     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
809   }
810   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
811   if (flg) {
812     Viewer viewer;
813     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
814     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
815     ierr = MatView(B,viewer); CHKERRQ(ierr);
816     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
817   }
818   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
819   if (flg) {
820     Viewer viewer;
821     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
822     ierr = MatView(B,viewer); CHKERRQ(ierr);
823     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
824   }
825   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
826   if (flg) {
827     Viewer viewer;
828     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
829     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr);
830     ierr = MatView(B,viewer); CHKERRQ(ierr);
831     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
832   }
833   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
834   if (flg) {
835     Draw    win;
836     ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr);
837     ierr = MatView(B,(Viewer)win); CHKERRQ(ierr);
838     ierr = DrawSyncFlush(win); CHKERRQ(ierr);
839     ierr = DrawDestroy(win); CHKERRQ(ierr);
840   }
841   return 0;
842 }
843 
844 
845 
846