xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7fc0212e64316908372d1091343c7f3bcc655bd0)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.4 1996/02/17 05:41:27 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr);
41   ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
42 
43   PetscFree(ia); PetscFree(ja);
44   return 0;
45 }
46 
47 /*
48      Adds diagonal pointers to sparse matrix structure.
49 */
50 
51 int MatMarkDiag_SeqBAIJ(Mat A)
52 {
53   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
54   int         i,j, *diag, m = a->mbs;
55 
56   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
57   PLogObjectMemory(A,(m+1)*sizeof(int));
58   for ( i=0; i<m; i++ ) {
59     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
60       if (a->j[j] == i) {
61         diag[i] = j;
62         break;
63       }
64     }
65   }
66   a->diag = diag;
67   return 0;
68 }
69 
70 #include "draw.h"
71 #include "pinclude/pviewer.h"
72 #include "sysio.h"
73 
74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
78   Scalar      *aa;
79 
80   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
81   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
82   col_lens[0] = MAT_COOKIE;
83   col_lens[1] = a->m;
84   col_lens[2] = a->n;
85   col_lens[3] = a->nz*bs*bs;
86 
87   /* store lengths of each row and write (including header) to file */
88   count = 0;
89   for ( i=0; i<a->mbs; i++ ) {
90     for ( j=0; j<bs; j++ ) {
91       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
92     }
93   }
94   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
95   PetscFree(col_lens);
96 
97   /* store column indices (zero start index) */
98   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
99   count = 0;
100   for ( i=0; i<a->mbs; i++ ) {
101     for ( j=0; j<bs; j++ ) {
102       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
103         for ( l=0; l<bs; l++ ) {
104           jj[count++] = bs*a->j[k] + l;
105         }
106       }
107     }
108   }
109   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
110   PetscFree(jj);
111 
112   /* store nonzero values */
113   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
118         for ( l=0; l<bs; l++ ) {
119           aa[count++] = a->a[bs*bs*k + l*bs + j];
120         }
121       }
122     }
123   }
124   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
125   PetscFree(aa);
126   return 0;
127 }
128 
129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
130 {
131   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
132   int         ierr, i,j,format,bs = a->bs,k,l;
133   FILE        *fd;
134   char        *outputname;
135 
136   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
137   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
138   ierr = ViewerFileGetFormat_Private(viewer,&format);
139   if (format == FILE_FORMAT_INFO) {
140     /* no need to print additional information */ ;
141   }
142   else if (format == FILE_FORMAT_MATLAB) {
143     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
144   }
145   else {
146     for ( i=0; i<a->mbs; i++ ) {
147       for ( j=0; j<bs; j++ ) {
148         fprintf(fd,"row %d:",i*bs+j);
149         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
150           for ( l=0; l<bs; l++ ) {
151             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
152           }
153         }
154         fprintf(fd,"\n");
155       }
156     }
157   }
158   fflush(fd);
159   return 0;
160 }
161 
162 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
163 {
164   Mat         A = (Mat) obj;
165   PetscObject vobj = (PetscObject) viewer;
166 
167   if (!viewer) {
168     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
169   }
170   if (vobj->cookie == VIEWER_COOKIE) {
171     if (vobj->type == MATLAB_VIEWER) {
172       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
173     }
174     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
175       return MatView_SeqBAIJ_ASCII(A,viewer);
176     }
177     else if (vobj->type == BINARY_FILE_VIEWER) {
178       return MatView_SeqBAIJ_Binary(A,viewer);
179     }
180   }
181   else if (vobj->cookie == DRAW_COOKIE) {
182     if (vobj->type == NULLWINDOW) return 0;
183     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
184   }
185   return 0;
186 }
187 
188 
189 static int MatZeroEntries_SeqBAIJ(Mat A)
190 {
191   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
192   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
193   return 0;
194 }
195 
196 int MatDestroy_SeqBAIJ(PetscObject obj)
197 {
198   Mat        A  = (Mat) obj;
199   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
200 
201 #if defined(PETSC_LOG)
202   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
203 #endif
204   PetscFree(a->a);
205   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
206   if (a->diag) PetscFree(a->diag);
207   if (a->ilen) PetscFree(a->ilen);
208   if (a->imax) PetscFree(a->imax);
209   if (a->solve_work) PetscFree(a->solve_work);
210   if (a->mult_work) PetscFree(a->mult_work);
211   PetscFree(a);
212   PLogObjectDestroy(A);
213   PetscHeaderDestroy(A);
214   return 0;
215 }
216 
217 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
218 {
219   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
220   if      (op == ROW_ORIENTED)              a->roworiented = 1;
221   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
222   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
223   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
224   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
225   else if (op == ROWS_SORTED ||
226            op == SYMMETRIC_MATRIX ||
227            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
228            op == YES_NEW_DIAGONALS)
229     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
230   else if (op == NO_NEW_DIAGONALS)
231     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
232   else
233     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
234   return 0;
235 }
236 
237 
238 /* -------------------------------------------------------*/
239 /* Should check that shapes of vectors and matrices match */
240 /* -------------------------------------------------------*/
241 #include "pinclude/plapack.h"
242 
243 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
244 {
245   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
246   Scalar          *xg,*yg;
247   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
248   register Scalar x1,x2,x3,x4,x5;
249   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
250   int             bs = a->bs,j,n,bs2 = bs*bs;
251 
252   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
253   PetscMemzero(y,m*sizeof(Scalar));
254   x     = x;
255   idx   = a->j;
256   v     = a->a;
257   ii    = a->i;
258 
259   switch (bs) {
260     case 1:
261      for ( i=0; i<m; i++ ) {
262        n    = ii[1] - ii[0]; ii++;
263        sum  = 0.0;
264        while (n--) sum += *v++ * x[*idx++];
265        y[i] = sum;
266       }
267       break;
268     case 2:
269       for ( i=0; i<mbs; i++ ) {
270         n  = ii[1] - ii[0]; ii++;
271         sum1 = 0.0; sum2 = 0.0;
272         for ( j=0; j<n; j++ ) {
273           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
274           sum1 += v[0]*x1 + v[2]*x2;
275           sum2 += v[1]*x1 + v[3]*x2;
276           v += 4;
277         }
278         y[0] += sum1; y[1] += sum2;
279         y += 2;
280       }
281       break;
282     case 3:
283       for ( i=0; i<mbs; i++ ) {
284         n  = ii[1] - ii[0]; ii++;
285         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
286         for ( j=0; j<n; j++ ) {
287           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
288           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
289           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
290           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
291           v += 9;
292         }
293         y[0] += sum1; y[1] += sum2; y[2] += sum3;
294         y += 3;
295       }
296       break;
297     case 4:
298       for ( i=0; i<mbs; i++ ) {
299         n  = ii[1] - ii[0]; ii++;
300         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
301         for ( j=0; j<n; j++ ) {
302           xb = x + 4*(*idx++);
303           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
304           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
305           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
306           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
307           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
308           v += 16;
309         }
310         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
311         y += 4;
312       }
313       break;
314     case 5:
315       for ( i=0; i<mbs; i++ ) {
316         n  = ii[1] - ii[0]; ii++;
317         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
318         for ( j=0; j<n; j++ ) {
319           xb = x + 5*(*idx++);
320           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
321           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
322           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
323           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
324           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
325           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
326           v += 25;
327         }
328         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
329         y += 5;
330       }
331       break;
332       /* block sizes larger then 5 by 5 are handled by BLAS */
333     default: {
334       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
335       if (!a->mult_work) {
336         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
337         CHKPTRQ(a->mult_work);
338       }
339       work = a->mult_work;
340       for ( i=0; i<mbs; i++ ) {
341         n     = ii[1] - ii[0]; ii++;
342         ncols = n*bs;
343         workt = work;
344         for ( j=0; j<n; j++ ) {
345           xb = x + bs*(*idx++);
346           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
347           workt += bs;
348         }
349         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
350         v += n*bs2;
351         y += bs;
352       }
353     }
354   }
355   PLogFlops(2*bs2*a->nz - m);
356   return 0;
357 }
358 
359 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
360 {
361   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
362   *nz  = a->bs*a->bs*a->nz;
363   *nza = a->maxnz;
364   *mem = (int)A->mem;
365   return 0;
366 }
367 
368 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
369 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
370 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
371 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
372 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
373 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
374 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
375 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
376 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
377 
378 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
379 {
380   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
381   *m = a->m; *n = a->n;
382   return 0;
383 }
384 
385 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
386 {
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
388   *m = 0; *n = a->m;
389   return 0;
390 }
391 
392 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
393 {
394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
395   Scalar      *v = a->a;
396   double      sum = 0.0;
397   int         i;
398 
399   if (type == NORM_FROBENIUS) {
400     for (i=0; i<a->nz; i++ ) {
401 #if defined(PETSC_COMPLEX)
402       sum += real(conj(*v)*(*v)); v++;
403 #else
404       sum += (*v)*(*v); v++;
405 #endif
406     }
407     *norm = sqrt(sum);
408   }
409   else {
410     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
411   }
412   return 0;
413 }
414 
415 /*
416      note: This can only work for identity for row and col. It would
417    be good to check this and otherwise generate an error.
418 */
419 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
420 {
421   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
422   Mat         outA;
423   int         ierr;
424 
425   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
426 
427   outA          = inA;
428   inA->factor   = FACTOR_LU;
429   a->row        = row;
430   a->col        = col;
431 
432   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
433 
434   if (!a->diag) {
435     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
436   }
437   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
438   return 0;
439 }
440 
441 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
442 {
443   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
444   int         one = 1, totalnz = a->bs*a->bs*a->nz;
445   BLscal_( &totalnz, alpha, a->a, &one );
446   PLogFlops(totalnz);
447   return 0;
448 }
449 
450 int MatPrintHelp_SeqBAIJ(Mat A)
451 {
452   static int called = 0;
453 
454   if (called) return 0; else called = 1;
455   return 0;
456 }
457 /* -------------------------------------------------------------------*/
458 static struct _MatOps MatOps = {0,
459        0,0,
460        MatMult_SeqBAIJ,0,
461        0,0,
462        MatSolve_SeqBAIJ,0,
463        0,0,
464        MatLUFactor_SeqBAIJ,0,
465        0,
466        0,
467        MatGetInfo_SeqBAIJ,0,
468        0,0,MatNorm_SeqBAIJ,
469        0,0,
470        0,
471        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
472        MatGetReordering_SeqBAIJ,
473        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
474        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
475        MatILUFactorSymbolic_SeqBAIJ,0,
476        0,0,/*MatConvert_SeqBAIJ*/ 0,
477        0,0,
478        MatConvertSameType_SeqBAIJ,0,0,
479        MatILUFactor_SeqBAIJ,0,0,
480        0,0,
481        0,0,
482        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
483        0};
484 
485 /*@C
486    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
487    (the default parallel PETSc format).  For good matrix assembly performance
488    the user should preallocate the matrix storage by setting the parameter nz
489    (or nzz).  By setting these parameters accurately, performance can be
490    increased by more than a factor of 50.
491 
492    Input Parameters:
493 .  comm - MPI communicator, set to MPI_COMM_SELF
494 .  bs - size of block
495 .  m - number of rows
496 .  n - number of columns
497 .  nz - number of block nonzeros per block row (same for all rows)
498 .  nzz - number of block nonzeros per block row or PETSC_NULL
499          (possibly different for each row)
500 
501    Output Parameter:
502 .  A - the matrix
503 
504    Notes:
505    The BAIJ format, is fully compatible with standard Fortran 77
506    storage.  That is, the stored row and column indices can begin at
507    either one (as in Fortran) or zero.  See the users manual for details.
508 
509    Specify the preallocated storage with either nz or nnz (not both).
510    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
511    allocation.  For additional details, see the users manual chapter on
512    matrices and the file $(PETSC_DIR)/Performance.
513 
514 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
515 @*/
516 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
517 {
518   Mat         B;
519   Mat_SeqBAIJ *b;
520   int         i,len,ierr, flg,mbs = m/bs;
521 
522   if (mbs*bs != m)
523     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
524 
525   *A      = 0;
526   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
527   PLogObjectCreate(B);
528   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
529   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
530   switch (bs) {
531     case 1:
532       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
533     break;
534   }
535   B->destroy          = MatDestroy_SeqBAIJ;
536   B->view             = MatView_SeqBAIJ;
537   B->factor           = 0;
538   B->lupivotthreshold = 1.0;
539   b->row              = 0;
540   b->col              = 0;
541   b->reallocs         = 0;
542 
543   b->m       = m;
544   b->mbs     = mbs;
545   b->n       = n;
546   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
547   if (nnz == PETSC_NULL) {
548     if (nz == PETSC_DEFAULT) nz = 5;
549     else if (nz <= 0)        nz = 1;
550     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
551     nz = nz*mbs;
552   }
553   else {
554     nz = 0;
555     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
556   }
557 
558   /* allocate the matrix space */
559   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
560   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
561   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
562   b->j  = (int *) (b->a + nz*bs*bs);
563   PetscMemzero(b->j,nz*sizeof(int));
564   b->i  = b->j + nz;
565   b->singlemalloc = 1;
566 
567   b->i[0] = 0;
568   for (i=1; i<mbs+1; i++) {
569     b->i[i] = b->i[i-1] + b->imax[i-1];
570   }
571 
572   /* b->ilen will count nonzeros in each block row so far. */
573   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
574   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
575   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
576 
577   b->bs               = bs;
578   b->mbs              = mbs;
579   b->nz               = 0;
580   b->maxnz            = nz;
581   b->sorted           = 0;
582   b->roworiented      = 1;
583   b->nonew            = 0;
584   b->diag             = 0;
585   b->solve_work       = 0;
586   b->mult_work        = 0;
587   b->spptr            = 0;
588 
589   *A = B;
590   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
591   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
592   return 0;
593 }
594 
595 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
596 {
597   Mat         C;
598   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
599   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
600 
601   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
602 
603   *B = 0;
604   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
605   PLogObjectCreate(C);
606   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
607   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
608   C->destroy    = MatDestroy_SeqBAIJ;
609   C->view       = MatView_SeqBAIJ;
610   C->factor     = A->factor;
611   c->row        = 0;
612   c->col        = 0;
613   C->assembled  = PETSC_TRUE;
614 
615   c->m          = a->m;
616   c->n          = a->n;
617   c->bs         = a->bs;
618   c->mbs        = a->mbs;
619   c->nbs        = a->nbs;
620 
621   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
622   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
623   for ( i=0; i<mbs; i++ ) {
624     c->imax[i] = a->imax[i];
625     c->ilen[i] = a->ilen[i];
626   }
627 
628   /* allocate the matrix space */
629   c->singlemalloc = 1;
630   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
631   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
632   c->j  = (int *) (c->a + nz*bs*bs);
633   c->i  = c->j + nz;
634   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
635   if (mbs > 0) {
636     PetscMemcpy(c->j,a->j,nz*sizeof(int));
637     if (cpvalues == COPY_VALUES) {
638       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
639     }
640   }
641 
642   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
643   c->sorted      = a->sorted;
644   c->roworiented = a->roworiented;
645   c->nonew       = a->nonew;
646 
647   if (a->diag) {
648     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
649     PLogObjectMemory(C,(mbs+1)*sizeof(int));
650     for ( i=0; i<mbs; i++ ) {
651       c->diag[i] = a->diag[i];
652     }
653   }
654   else c->diag          = 0;
655   c->nz                 = a->nz;
656   c->maxnz              = a->maxnz;
657   c->solve_work         = 0;
658   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
659   c->mult_work          = 0;
660   *B = C;
661   return 0;
662 }
663 
664 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
665 {
666   Mat_SeqBAIJ  *a;
667   Mat          B;
668   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
669   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
670   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
671   int          *masked, nmask,tmp,ishift,bs2;
672   Scalar       *aa;
673   PetscObject  vobj = (PetscObject) bview;
674   MPI_Comm     comm = vobj->comm;
675 
676   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
677   bs2  = bs*bs;
678 
679   MPI_Comm_size(comm,&size);
680   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
681   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
682   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
683   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
684   M = header[1]; N = header[2]; nz = header[3];
685 
686   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
687 
688   /*
689      This code adds extra rows to make sure the number of rows is
690     divisible by the blocksize
691   */
692   mbs        = M/bs;
693   extra_rows = bs - M + bs*(mbs);
694   if (extra_rows == bs) extra_rows = 0;
695   else                  mbs++;
696   if (extra_rows) {
697     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
698   }
699 
700   /* read in row lengths */
701   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
702   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
703   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
704 
705   /* read in column indices */
706   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
707   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
708   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
709 
710   /* loop over row lengths determining block row lengths */
711   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
712   PetscMemzero(browlengths,mbs*sizeof(int));
713   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
714   PetscMemzero(mask,mbs*sizeof(int));
715   masked = mask + mbs;
716   rowcount = 0; nzcount = 0;
717   for ( i=0; i<mbs; i++ ) {
718     nmask = 0;
719     for ( j=0; j<bs; j++ ) {
720       kmax = rowlengths[rowcount];
721       for ( k=0; k<kmax; k++ ) {
722         tmp = jj[nzcount++]/bs;
723         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
724       }
725       rowcount++;
726     }
727     browlengths[i] += nmask;
728     /* zero out the mask elements we set */
729     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
730   }
731 
732   /* create our matrix */
733   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
734          CHKERRQ(ierr);
735   B = *A;
736   a = (Mat_SeqBAIJ *) B->data;
737 
738   /* set matrix "i" values */
739   a->i[0] = 0;
740   for ( i=1; i<= mbs; i++ ) {
741     a->i[i]      = a->i[i-1] + browlengths[i-1];
742     a->ilen[i-1] = browlengths[i-1];
743   }
744   a->nz = 0;
745   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
746 
747   /* read in nonzero values */
748   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
749   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
750   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
751 
752   /* set "a" and "j" values into matrix */
753   nzcount = 0; jcount = 0;
754   for ( i=0; i<mbs; i++ ) {
755     nzcountb = nzcount;
756     nmask    = 0;
757     for ( j=0; j<bs; j++ ) {
758       kmax = rowlengths[i*bs+j];
759       for ( k=0; k<kmax; k++ ) {
760         tmp = jj[nzcount++]/bs;
761 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
762       }
763       rowcount++;
764     }
765     /* sort the masked values */
766     SYIsort(nmask,masked);
767 
768     /* set "j" values into matrix */
769     maskcount = 1;
770     for ( j=0; j<nmask; j++ ) {
771       a->j[jcount++]  = masked[j];
772       mask[masked[j]] = maskcount++;
773     }
774     /* set "a" values into matrix */
775     ishift = bs2*a->i[i];
776     for ( j=0; j<bs; j++ ) {
777       kmax = rowlengths[i*bs+j];
778       for ( k=0; k<kmax; k++ ) {
779         tmp    = jj[nzcountb]/bs ;
780         block  = mask[tmp] - 1;
781         point  = jj[nzcountb] - bs*tmp;
782         idx    = ishift + bs2*block + j + bs*point;
783         a->a[idx] = aa[nzcountb++];
784       }
785     }
786     /* zero out the mask elements we set */
787     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
788   }
789   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
790 
791   PetscFree(rowlengths);
792   PetscFree(browlengths);
793   PetscFree(aa);
794   PetscFree(jj);
795   PetscFree(mask);
796 
797   B->assembled = PETSC_TRUE;
798 
799   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
800   if (flg) {
801     Viewer viewer;
802     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
803     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr);
804     ierr = MatView(B,viewer); CHKERRQ(ierr);
805     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
806   }
807   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
808   if (flg) {
809     Viewer viewer;
810     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
811     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
812     ierr = MatView(B,viewer); CHKERRQ(ierr);
813     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
814   }
815   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
816   if (flg) {
817     Viewer viewer;
818     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
819     ierr = MatView(B,viewer); CHKERRQ(ierr);
820     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
821   }
822   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
823   if (flg) {
824     Viewer viewer;
825     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
826     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr);
827     ierr = MatView(B,viewer); CHKERRQ(ierr);
828     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
829   }
830   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
831   if (flg) {
832     Draw    win;
833     ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr);
834     ierr = MatView(B,(Viewer)win); CHKERRQ(ierr);
835     ierr = DrawSyncFlush(win); CHKERRQ(ierr);
836     ierr = DrawDestroy(win); CHKERRQ(ierr);
837   }
838   return 0;
839 }
840 
841 
842 
843