xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 6d84be18fbb99ba69be7b8bdde5411a66955b7ea)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.8 1996/02/23 22:03:48 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr);
41   ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
42 
43   PetscFree(ia); PetscFree(ja);
44   return 0;
45 }
46 
47 /*
48      Adds diagonal pointers to sparse matrix structure.
49 */
50 
51 int MatMarkDiag_SeqBAIJ(Mat A)
52 {
53   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
54   int         i,j, *diag, m = a->mbs;
55 
56   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
57   PLogObjectMemory(A,(m+1)*sizeof(int));
58   for ( i=0; i<m; i++ ) {
59     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
60       if (a->j[j] == i) {
61         diag[i] = j;
62         break;
63       }
64     }
65   }
66   a->diag = diag;
67   return 0;
68 }
69 
70 #include "draw.h"
71 #include "pinclude/pviewer.h"
72 #include "sysio.h"
73 
74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
78   Scalar      *aa;
79 
80   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
81   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
82   col_lens[0] = MAT_COOKIE;
83   col_lens[1] = a->m;
84   col_lens[2] = a->n;
85   col_lens[3] = a->nz*bs*bs;
86 
87   /* store lengths of each row and write (including header) to file */
88   count = 0;
89   for ( i=0; i<a->mbs; i++ ) {
90     for ( j=0; j<bs; j++ ) {
91       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
92     }
93   }
94   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
95   PetscFree(col_lens);
96 
97   /* store column indices (zero start index) */
98   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
99   count = 0;
100   for ( i=0; i<a->mbs; i++ ) {
101     for ( j=0; j<bs; j++ ) {
102       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
103         for ( l=0; l<bs; l++ ) {
104           jj[count++] = bs*a->j[k] + l;
105         }
106       }
107     }
108   }
109   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
110   PetscFree(jj);
111 
112   /* store nonzero values */
113   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
118         for ( l=0; l<bs; l++ ) {
119           aa[count++] = a->a[bs*bs*k + l*bs + j];
120         }
121       }
122     }
123   }
124   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
125   PetscFree(aa);
126   return 0;
127 }
128 
129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
130 {
131   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
132   int         ierr, i,j,format,bs = a->bs,k,l;
133   FILE        *fd;
134   char        *outputname;
135 
136   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
137   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
138   ierr = ViewerFileGetFormat_Private(viewer,&format);
139   if (format == FILE_FORMAT_INFO) {
140     /* no need to print additional information */ ;
141   }
142   else if (format == FILE_FORMAT_MATLAB) {
143     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
144   }
145   else {
146     for ( i=0; i<a->mbs; i++ ) {
147       for ( j=0; j<bs; j++ ) {
148         fprintf(fd,"row %d:",i*bs+j);
149         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
150           for ( l=0; l<bs; l++ ) {
151 #if defined(PETSC_COMPLEX)
152           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
153             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
154               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
155           }
156           else {
157             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
158           }
159 #else
160             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
161 #endif
162           }
163         }
164         fprintf(fd,"\n");
165       }
166     }
167   }
168   fflush(fd);
169   return 0;
170 }
171 
172 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
173 {
174   Mat         A = (Mat) obj;
175   PetscObject vobj = (PetscObject) viewer;
176 
177   if (!viewer) {
178     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
179   }
180   if (vobj->cookie == VIEWER_COOKIE) {
181     if (vobj->type == MATLAB_VIEWER) {
182       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
183     }
184     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
185       return MatView_SeqBAIJ_ASCII(A,viewer);
186     }
187     else if (vobj->type == BINARY_FILE_VIEWER) {
188       return MatView_SeqBAIJ_Binary(A,viewer);
189     }
190   }
191   else if (vobj->cookie == DRAW_COOKIE) {
192     if (vobj->type == NULLWINDOW) return 0;
193     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
194   }
195   return 0;
196 }
197 
198 
199 static int MatZeroEntries_SeqBAIJ(Mat A)
200 {
201   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
202   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
203   return 0;
204 }
205 
206 int MatDestroy_SeqBAIJ(PetscObject obj)
207 {
208   Mat        A  = (Mat) obj;
209   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
210 
211 #if defined(PETSC_LOG)
212   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
213 #endif
214   PetscFree(a->a);
215   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
216   if (a->diag) PetscFree(a->diag);
217   if (a->ilen) PetscFree(a->ilen);
218   if (a->imax) PetscFree(a->imax);
219   if (a->solve_work) PetscFree(a->solve_work);
220   if (a->mult_work) PetscFree(a->mult_work);
221   PetscFree(a);
222   PLogObjectDestroy(A);
223   PetscHeaderDestroy(A);
224   return 0;
225 }
226 
227 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
228 {
229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230   if      (op == ROW_ORIENTED)              a->roworiented = 1;
231   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
232   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
233   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
234   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
235   else if (op == ROWS_SORTED ||
236            op == SYMMETRIC_MATRIX ||
237            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
238            op == YES_NEW_DIAGONALS)
239     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
240   else if (op == NO_NEW_DIAGONALS)
241     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
242   else
243     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
244   return 0;
245 }
246 
247 
248 /* -------------------------------------------------------*/
249 /* Should check that shapes of vectors and matrices match */
250 /* -------------------------------------------------------*/
251 #include "pinclude/plapack.h"
252 
253 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
254 {
255   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
256   Scalar          *xg,*yg;
257   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
258   register Scalar x1,x2,x3,x4,x5;
259   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
260   int             bs = a->bs,j,n,bs2 = bs*bs;
261 
262   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
263   PetscMemzero(y,m*sizeof(Scalar));
264   x     = x;
265   idx   = a->j;
266   v     = a->a;
267   ii    = a->i;
268 
269   switch (bs) {
270     case 1:
271      for ( i=0; i<m; i++ ) {
272        n    = ii[1] - ii[0]; ii++;
273        sum  = 0.0;
274        while (n--) sum += *v++ * x[*idx++];
275        y[i] = sum;
276       }
277       break;
278     case 2:
279       for ( i=0; i<mbs; i++ ) {
280         n  = ii[1] - ii[0]; ii++;
281         sum1 = 0.0; sum2 = 0.0;
282         for ( j=0; j<n; j++ ) {
283           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
284           sum1 += v[0]*x1 + v[2]*x2;
285           sum2 += v[1]*x1 + v[3]*x2;
286           v += 4;
287         }
288         y[0] += sum1; y[1] += sum2;
289         y += 2;
290       }
291       break;
292     case 3:
293       for ( i=0; i<mbs; i++ ) {
294         n  = ii[1] - ii[0]; ii++;
295         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
296         for ( j=0; j<n; j++ ) {
297           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
298           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
299           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
300           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
301           v += 9;
302         }
303         y[0] += sum1; y[1] += sum2; y[2] += sum3;
304         y += 3;
305       }
306       break;
307     case 4:
308       for ( i=0; i<mbs; i++ ) {
309         n  = ii[1] - ii[0]; ii++;
310         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
311         for ( j=0; j<n; j++ ) {
312           xb = x + 4*(*idx++);
313           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
314           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
315           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
316           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
317           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
318           v += 16;
319         }
320         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
321         y += 4;
322       }
323       break;
324     case 5:
325       for ( i=0; i<mbs; i++ ) {
326         n  = ii[1] - ii[0]; ii++;
327         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
328         for ( j=0; j<n; j++ ) {
329           xb = x + 5*(*idx++);
330           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
331           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
332           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
333           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
334           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
335           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
336           v += 25;
337         }
338         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
339         y += 5;
340       }
341       break;
342       /* block sizes larger then 5 by 5 are handled by BLAS */
343     default: {
344       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
345       if (!a->mult_work) {
346         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
347         CHKPTRQ(a->mult_work);
348       }
349       work = a->mult_work;
350       for ( i=0; i<mbs; i++ ) {
351         n     = ii[1] - ii[0]; ii++;
352         ncols = n*bs;
353         workt = work;
354         for ( j=0; j<n; j++ ) {
355           xb = x + bs*(*idx++);
356           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
357           workt += bs;
358         }
359         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
360         v += n*bs2;
361         y += bs;
362       }
363     }
364   }
365   PLogFlops(2*bs2*a->nz - m);
366   return 0;
367 }
368 
369 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
370 {
371   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
372   *nz  = a->bs*a->bs*a->nz;
373   *nza = a->maxnz;
374   *mem = (int)A->mem;
375   return 0;
376 }
377 
378 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
379 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
380 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
381 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
382 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
383 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
384 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
385 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
386 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
387 
388 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
389 {
390   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
391   *m = a->m; *n = a->n;
392   return 0;
393 }
394 
395 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
396 {
397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
398   *m = 0; *n = a->m;
399   return 0;
400 }
401 
402 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
403 {
404   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
405   Scalar      *v = a->a;
406   double      sum = 0.0;
407   int         i;
408 
409   if (type == NORM_FROBENIUS) {
410     for (i=0; i<a->nz; i++ ) {
411 #if defined(PETSC_COMPLEX)
412       sum += real(conj(*v)*(*v)); v++;
413 #else
414       sum += (*v)*(*v); v++;
415 #endif
416     }
417     *norm = sqrt(sum);
418   }
419   else {
420     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
421   }
422   return 0;
423 }
424 
425 /*
426      note: This can only work for identity for row and col. It would
427    be good to check this and otherwise generate an error.
428 */
429 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
430 {
431   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
432   Mat         outA;
433   int         ierr;
434 
435   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
436 
437   outA          = inA;
438   inA->factor   = FACTOR_LU;
439   a->row        = row;
440   a->col        = col;
441 
442   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
443 
444   if (!a->diag) {
445     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
446   }
447   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
448   return 0;
449 }
450 
451 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
452 {
453   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
454   int         one = 1, totalnz = a->bs*a->bs*a->nz;
455   BLscal_( &totalnz, alpha, a->a, &one );
456   PLogFlops(totalnz);
457   return 0;
458 }
459 
460 int MatPrintHelp_SeqBAIJ(Mat A)
461 {
462   static int called = 0;
463 
464   if (called) return 0; else called = 1;
465   return 0;
466 }
467 /* -------------------------------------------------------------------*/
468 static struct _MatOps MatOps = {0,
469        0,0,
470        MatMult_SeqBAIJ,0,
471        0,0,
472        MatSolve_SeqBAIJ,0,
473        0,0,
474        MatLUFactor_SeqBAIJ,0,
475        0,
476        0,
477        MatGetInfo_SeqBAIJ,0,
478        0,0,MatNorm_SeqBAIJ,
479        0,0,
480        0,
481        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
482        MatGetReordering_SeqBAIJ,
483        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
484        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
485        MatILUFactorSymbolic_SeqBAIJ,0,
486        0,0,/*MatConvert_SeqBAIJ*/ 0,
487        0,0,
488        MatConvertSameType_SeqBAIJ,0,0,
489        MatILUFactor_SeqBAIJ,0,0,
490        0,0,
491        0,0,
492        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
493        0};
494 
495 /*@C
496    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
497    (the default parallel PETSc format).  For good matrix assembly performance
498    the user should preallocate the matrix storage by setting the parameter nz
499    (or nzz).  By setting these parameters accurately, performance can be
500    increased by more than a factor of 50.
501 
502    Input Parameters:
503 .  comm - MPI communicator, set to MPI_COMM_SELF
504 .  bs - size of block
505 .  m - number of rows
506 .  n - number of columns
507 .  nz - number of block nonzeros per block row (same for all rows)
508 .  nzz - number of block nonzeros per block row or PETSC_NULL
509          (possibly different for each row)
510 
511    Output Parameter:
512 .  A - the matrix
513 
514    Notes:
515    The BAIJ format, is fully compatible with standard Fortran 77
516    storage.  That is, the stored row and column indices can begin at
517    either one (as in Fortran) or zero.  See the users manual for details.
518 
519    Specify the preallocated storage with either nz or nnz (not both).
520    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
521    allocation.  For additional details, see the users manual chapter on
522    matrices and the file $(PETSC_DIR)/Performance.
523 
524 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
525 @*/
526 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
527 {
528   Mat         B;
529   Mat_SeqBAIJ *b;
530   int         i,len,ierr, flg,mbs = m/bs;
531 
532   if (mbs*bs != m)
533     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
534 
535   *A      = 0;
536   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
537   PLogObjectCreate(B);
538   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
539   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
540   switch (bs) {
541     case 1:
542       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
543       break;
544     case 2:
545       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
546       break;
547     case 3:
548       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
549       break;
550     case 4:
551       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
552       break;
553     case 5:
554       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
555       break;
556   }
557   B->destroy          = MatDestroy_SeqBAIJ;
558   B->view             = MatView_SeqBAIJ;
559   B->factor           = 0;
560   B->lupivotthreshold = 1.0;
561   b->row              = 0;
562   b->col              = 0;
563   b->reallocs         = 0;
564 
565   b->m       = m;
566   b->mbs     = mbs;
567   b->n       = n;
568   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
569   if (nnz == PETSC_NULL) {
570     if (nz == PETSC_DEFAULT) nz = 5;
571     else if (nz <= 0)        nz = 1;
572     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
573     nz = nz*mbs;
574   }
575   else {
576     nz = 0;
577     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
578   }
579 
580   /* allocate the matrix space */
581   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
582   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
583   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
584   b->j  = (int *) (b->a + nz*bs*bs);
585   PetscMemzero(b->j,nz*sizeof(int));
586   b->i  = b->j + nz;
587   b->singlemalloc = 1;
588 
589   b->i[0] = 0;
590   for (i=1; i<mbs+1; i++) {
591     b->i[i] = b->i[i-1] + b->imax[i-1];
592   }
593 
594   /* b->ilen will count nonzeros in each block row so far. */
595   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
596   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
597   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
598 
599   b->bs               = bs;
600   b->mbs              = mbs;
601   b->nz               = 0;
602   b->maxnz            = nz;
603   b->sorted           = 0;
604   b->roworiented      = 1;
605   b->nonew            = 0;
606   b->diag             = 0;
607   b->solve_work       = 0;
608   b->mult_work        = 0;
609   b->spptr            = 0;
610 
611   *A = B;
612   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
613   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
614   return 0;
615 }
616 
617 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
618 {
619   Mat         C;
620   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
621   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
622 
623   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
624 
625   *B = 0;
626   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
627   PLogObjectCreate(C);
628   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
629   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
630   C->destroy    = MatDestroy_SeqBAIJ;
631   C->view       = MatView_SeqBAIJ;
632   C->factor     = A->factor;
633   c->row        = 0;
634   c->col        = 0;
635   C->assembled  = PETSC_TRUE;
636 
637   c->m          = a->m;
638   c->n          = a->n;
639   c->bs         = a->bs;
640   c->mbs        = a->mbs;
641   c->nbs        = a->nbs;
642 
643   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
644   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
645   for ( i=0; i<mbs; i++ ) {
646     c->imax[i] = a->imax[i];
647     c->ilen[i] = a->ilen[i];
648   }
649 
650   /* allocate the matrix space */
651   c->singlemalloc = 1;
652   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
653   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
654   c->j  = (int *) (c->a + nz*bs*bs);
655   c->i  = c->j + nz;
656   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
657   if (mbs > 0) {
658     PetscMemcpy(c->j,a->j,nz*sizeof(int));
659     if (cpvalues == COPY_VALUES) {
660       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
661     }
662   }
663 
664   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
665   c->sorted      = a->sorted;
666   c->roworiented = a->roworiented;
667   c->nonew       = a->nonew;
668 
669   if (a->diag) {
670     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
671     PLogObjectMemory(C,(mbs+1)*sizeof(int));
672     for ( i=0; i<mbs; i++ ) {
673       c->diag[i] = a->diag[i];
674     }
675   }
676   else c->diag          = 0;
677   c->nz                 = a->nz;
678   c->maxnz              = a->maxnz;
679   c->solve_work         = 0;
680   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
681   c->mult_work          = 0;
682   *B = C;
683   return 0;
684 }
685 
686 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
687 {
688   Mat_SeqBAIJ  *a;
689   Mat          B;
690   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
691   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
692   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
693   int          *masked, nmask,tmp,ishift,bs2;
694   Scalar       *aa;
695   PetscObject  vobj = (PetscObject) bview;
696   MPI_Comm     comm = vobj->comm;
697 
698   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
699   bs2  = bs*bs;
700 
701   MPI_Comm_size(comm,&size);
702   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
703   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
704   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
705   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
706   M = header[1]; N = header[2]; nz = header[3];
707 
708   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
709 
710   /*
711      This code adds extra rows to make sure the number of rows is
712     divisible by the blocksize
713   */
714   mbs        = M/bs;
715   extra_rows = bs - M + bs*(mbs);
716   if (extra_rows == bs) extra_rows = 0;
717   else                  mbs++;
718   if (extra_rows) {
719     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
720   }
721 
722   /* read in row lengths */
723   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
724   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
725   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
726 
727   /* read in column indices */
728   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
729   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
730   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
731 
732   /* loop over row lengths determining block row lengths */
733   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
734   PetscMemzero(browlengths,mbs*sizeof(int));
735   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
736   PetscMemzero(mask,mbs*sizeof(int));
737   masked = mask + mbs;
738   rowcount = 0; nzcount = 0;
739   for ( i=0; i<mbs; i++ ) {
740     nmask = 0;
741     for ( j=0; j<bs; j++ ) {
742       kmax = rowlengths[rowcount];
743       for ( k=0; k<kmax; k++ ) {
744         tmp = jj[nzcount++]/bs;
745         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
746       }
747       rowcount++;
748     }
749     browlengths[i] += nmask;
750     /* zero out the mask elements we set */
751     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
752   }
753 
754   /* create our matrix */
755   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
756          CHKERRQ(ierr);
757   B = *A;
758   a = (Mat_SeqBAIJ *) B->data;
759 
760   /* set matrix "i" values */
761   a->i[0] = 0;
762   for ( i=1; i<= mbs; i++ ) {
763     a->i[i]      = a->i[i-1] + browlengths[i-1];
764     a->ilen[i-1] = browlengths[i-1];
765   }
766   a->nz = 0;
767   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
768 
769   /* read in nonzero values */
770   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
771   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
772   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
773 
774   /* set "a" and "j" values into matrix */
775   nzcount = 0; jcount = 0;
776   for ( i=0; i<mbs; i++ ) {
777     nzcountb = nzcount;
778     nmask    = 0;
779     for ( j=0; j<bs; j++ ) {
780       kmax = rowlengths[i*bs+j];
781       for ( k=0; k<kmax; k++ ) {
782         tmp = jj[nzcount++]/bs;
783 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
784       }
785       rowcount++;
786     }
787     /* sort the masked values */
788     SYIsort(nmask,masked);
789 
790     /* set "j" values into matrix */
791     maskcount = 1;
792     for ( j=0; j<nmask; j++ ) {
793       a->j[jcount++]  = masked[j];
794       mask[masked[j]] = maskcount++;
795     }
796     /* set "a" values into matrix */
797     ishift = bs2*a->i[i];
798     for ( j=0; j<bs; j++ ) {
799       kmax = rowlengths[i*bs+j];
800       for ( k=0; k<kmax; k++ ) {
801         tmp    = jj[nzcountb]/bs ;
802         block  = mask[tmp] - 1;
803         point  = jj[nzcountb] - bs*tmp;
804         idx    = ishift + bs2*block + j + bs*point;
805         a->a[idx] = aa[nzcountb++];
806       }
807     }
808     /* zero out the mask elements we set */
809     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
810   }
811   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
812 
813   PetscFree(rowlengths);
814   PetscFree(browlengths);
815   PetscFree(aa);
816   PetscFree(jj);
817   PetscFree(mask);
818 
819   B->assembled = PETSC_TRUE;
820 
821   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
822   if (flg) {
823     Viewer viewer;
824     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
825     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr);
826     ierr = MatView(B,viewer); CHKERRQ(ierr);
827     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
828   }
829   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
830   if (flg) {
831     Viewer viewer;
832     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
833     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
834     ierr = MatView(B,viewer); CHKERRQ(ierr);
835     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
836   }
837   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
838   if (flg) {
839     Viewer viewer;
840     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
841     ierr = MatView(B,viewer); CHKERRQ(ierr);
842     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
843   }
844   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
845   if (flg) {
846     Viewer viewer;
847     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
848     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr);
849     ierr = MatView(B,viewer); CHKERRQ(ierr);
850     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
851   }
852   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
853   if (flg) {
854     Draw    win;
855     ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr);
856     ierr = MatView(B,(Viewer)win); CHKERRQ(ierr);
857     ierr = DrawSyncFlush(win); CHKERRQ(ierr);
858     ierr = DrawDestroy(win); CHKERRQ(ierr);
859   }
860   return 0;
861 }
862 
863 
864 
865