xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c752a4e5f57a3aac85b6ca038ac63f28f9587815)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.7 1996/02/20 18:52:36 curfman Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr);
41   ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
42 
43   PetscFree(ia); PetscFree(ja);
44   return 0;
45 }
46 
47 /*
48      Adds diagonal pointers to sparse matrix structure.
49 */
50 
51 int MatMarkDiag_SeqBAIJ(Mat A)
52 {
53   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
54   int         i,j, *diag, m = a->mbs;
55 
56   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
57   PLogObjectMemory(A,(m+1)*sizeof(int));
58   for ( i=0; i<m; i++ ) {
59     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
60       if (a->j[j] == i) {
61         diag[i] = j;
62         break;
63       }
64     }
65   }
66   a->diag = diag;
67   return 0;
68 }
69 
70 #include "draw.h"
71 #include "pinclude/pviewer.h"
72 #include "sysio.h"
73 
74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
78   Scalar      *aa;
79 
80   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
81   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
82   col_lens[0] = MAT_COOKIE;
83   col_lens[1] = a->m;
84   col_lens[2] = a->n;
85   col_lens[3] = a->nz*bs*bs;
86 
87   /* store lengths of each row and write (including header) to file */
88   count = 0;
89   for ( i=0; i<a->mbs; i++ ) {
90     for ( j=0; j<bs; j++ ) {
91       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
92     }
93   }
94   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
95   PetscFree(col_lens);
96 
97   /* store column indices (zero start index) */
98   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
99   count = 0;
100   for ( i=0; i<a->mbs; i++ ) {
101     for ( j=0; j<bs; j++ ) {
102       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
103         for ( l=0; l<bs; l++ ) {
104           jj[count++] = bs*a->j[k] + l;
105         }
106       }
107     }
108   }
109   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
110   PetscFree(jj);
111 
112   /* store nonzero values */
113   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
118         for ( l=0; l<bs; l++ ) {
119           aa[count++] = a->a[bs*bs*k + l*bs + j];
120         }
121       }
122     }
123   }
124   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
125   PetscFree(aa);
126   return 0;
127 }
128 
129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
130 {
131   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
132   int         ierr, i,j,format,bs = a->bs,k,l;
133   FILE        *fd;
134   char        *outputname;
135 
136   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
137   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
138   ierr = ViewerFileGetFormat_Private(viewer,&format);
139   if (format == FILE_FORMAT_INFO) {
140     /* no need to print additional information */ ;
141   }
142   else if (format == FILE_FORMAT_MATLAB) {
143     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
144   }
145   else {
146     for ( i=0; i<a->mbs; i++ ) {
147       for ( j=0; j<bs; j++ ) {
148         fprintf(fd,"row %d:",i*bs+j);
149         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
150           for ( l=0; l<bs; l++ ) {
151 #if defined(PETSC_COMPLEX)
152           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
153             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
154               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
155           }
156           else {
157             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
158           }
159 #else
160             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
161 #endif
162           }
163         }
164         fprintf(fd,"\n");
165       }
166     }
167   }
168   fflush(fd);
169   return 0;
170 }
171 
172 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
173 {
174   Mat         A = (Mat) obj;
175   PetscObject vobj = (PetscObject) viewer;
176 
177   if (!viewer) {
178     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
179   }
180   if (vobj->cookie == VIEWER_COOKIE) {
181     if (vobj->type == MATLAB_VIEWER) {
182       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
183     }
184     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
185       return MatView_SeqBAIJ_ASCII(A,viewer);
186     }
187     else if (vobj->type == BINARY_FILE_VIEWER) {
188       return MatView_SeqBAIJ_Binary(A,viewer);
189     }
190   }
191   else if (vobj->cookie == DRAW_COOKIE) {
192     if (vobj->type == NULLWINDOW) return 0;
193     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
194   }
195   return 0;
196 }
197 
198 
199 static int MatZeroEntries_SeqBAIJ(Mat A)
200 {
201   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
202   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
203   return 0;
204 }
205 
206 int MatDestroy_SeqBAIJ(PetscObject obj)
207 {
208   Mat        A  = (Mat) obj;
209   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
210 
211 #if defined(PETSC_LOG)
212   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
213 #endif
214   PetscFree(a->a);
215   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
216   if (a->diag) PetscFree(a->diag);
217   if (a->ilen) PetscFree(a->ilen);
218   if (a->imax) PetscFree(a->imax);
219   if (a->solve_work) PetscFree(a->solve_work);
220   if (a->mult_work) PetscFree(a->mult_work);
221   PetscFree(a);
222   PLogObjectDestroy(A);
223   PetscHeaderDestroy(A);
224   return 0;
225 }
226 
227 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
228 {
229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230   if      (op == ROW_ORIENTED)              a->roworiented = 1;
231   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
232   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
233   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
234   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
235   else if (op == ROWS_SORTED ||
236            op == SYMMETRIC_MATRIX ||
237            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
238            op == YES_NEW_DIAGONALS)
239     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
240   else if (op == NO_NEW_DIAGONALS)
241     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
242   else
243     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
244   return 0;
245 }
246 
247 
248 /* -------------------------------------------------------*/
249 /* Should check that shapes of vectors and matrices match */
250 /* -------------------------------------------------------*/
251 #include "pinclude/plapack.h"
252 
253 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
254 {
255   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
256   Scalar          *xg,*yg;
257   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
258   register Scalar x1,x2,x3,x4,x5;
259   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
260   int             bs = a->bs,j,n,bs2 = bs*bs;
261 
262   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
263   PetscMemzero(y,m*sizeof(Scalar));
264   x     = x;
265   idx   = a->j;
266   v     = a->a;
267   ii    = a->i;
268 
269   switch (bs) {
270     case 1:
271      for ( i=0; i<m; i++ ) {
272        n    = ii[1] - ii[0]; ii++;
273        sum  = 0.0;
274        while (n--) sum += *v++ * x[*idx++];
275        y[i] = sum;
276       }
277       break;
278     case 2:
279       for ( i=0; i<mbs; i++ ) {
280         n  = ii[1] - ii[0]; ii++;
281         sum1 = 0.0; sum2 = 0.0;
282         for ( j=0; j<n; j++ ) {
283           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
284           sum1 += v[0]*x1 + v[2]*x2;
285           sum2 += v[1]*x1 + v[3]*x2;
286           v += 4;
287         }
288         y[0] += sum1; y[1] += sum2;
289         y += 2;
290       }
291       break;
292     case 3:
293       for ( i=0; i<mbs; i++ ) {
294         n  = ii[1] - ii[0]; ii++;
295         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
296         for ( j=0; j<n; j++ ) {
297           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
298           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
299           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
300           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
301           v += 9;
302         }
303         y[0] += sum1; y[1] += sum2; y[2] += sum3;
304         y += 3;
305       }
306       break;
307     case 4:
308       for ( i=0; i<mbs; i++ ) {
309         n  = ii[1] - ii[0]; ii++;
310         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
311         for ( j=0; j<n; j++ ) {
312           xb = x + 4*(*idx++);
313           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
314           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
315           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
316           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
317           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
318           v += 16;
319         }
320         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
321         y += 4;
322       }
323       break;
324     case 5:
325       for ( i=0; i<mbs; i++ ) {
326         n  = ii[1] - ii[0]; ii++;
327         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
328         for ( j=0; j<n; j++ ) {
329           xb = x + 5*(*idx++);
330           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
331           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
332           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
333           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
334           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
335           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
336           v += 25;
337         }
338         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
339         y += 5;
340       }
341       break;
342       /* block sizes larger then 5 by 5 are handled by BLAS */
343     default: {
344       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
345       if (!a->mult_work) {
346         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
347         CHKPTRQ(a->mult_work);
348       }
349       work = a->mult_work;
350       for ( i=0; i<mbs; i++ ) {
351         n     = ii[1] - ii[0]; ii++;
352         ncols = n*bs;
353         workt = work;
354         for ( j=0; j<n; j++ ) {
355           xb = x + bs*(*idx++);
356           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
357           workt += bs;
358         }
359         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
360         v += n*bs2;
361         y += bs;
362       }
363     }
364   }
365   PLogFlops(2*bs2*a->nz - m);
366   return 0;
367 }
368 
369 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
370 {
371   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
372   *nz  = a->bs*a->bs*a->nz;
373   *nza = a->maxnz;
374   *mem = (int)A->mem;
375   return 0;
376 }
377 
378 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
379 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
380 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
381 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
382 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
383 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
384 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
385 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
386 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
387 
388 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
389 {
390   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
391   *m = a->m; *n = a->n;
392   return 0;
393 }
394 
395 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
396 {
397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
398   *m = 0; *n = a->m;
399   return 0;
400 }
401 
402 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
403 {
404   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
405   Scalar      *v = a->a;
406   double      sum = 0.0;
407   int         i;
408 
409   if (type == NORM_FROBENIUS) {
410     for (i=0; i<a->nz; i++ ) {
411 #if defined(PETSC_COMPLEX)
412       sum += real(conj(*v)*(*v)); v++;
413 #else
414       sum += (*v)*(*v); v++;
415 #endif
416     }
417     *norm = sqrt(sum);
418   }
419   else {
420     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
421   }
422   return 0;
423 }
424 
425 /*
426      note: This can only work for identity for row and col. It would
427    be good to check this and otherwise generate an error.
428 */
429 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
430 {
431   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
432   Mat         outA;
433   int         ierr;
434 
435   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
436 
437   outA          = inA;
438   inA->factor   = FACTOR_LU;
439   a->row        = row;
440   a->col        = col;
441 
442   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
443 
444   if (!a->diag) {
445     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
446   }
447   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
448   return 0;
449 }
450 
451 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
452 {
453   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
454   int         one = 1, totalnz = a->bs*a->bs*a->nz;
455   BLscal_( &totalnz, alpha, a->a, &one );
456   PLogFlops(totalnz);
457   return 0;
458 }
459 
460 int MatPrintHelp_SeqBAIJ(Mat A)
461 {
462   static int called = 0;
463 
464   if (called) return 0; else called = 1;
465   return 0;
466 }
467 /* -------------------------------------------------------------------*/
468 static struct _MatOps MatOps = {0,
469        0,0,
470        MatMult_SeqBAIJ,0,
471        0,0,
472        MatSolve_SeqBAIJ,0,
473        0,0,
474        MatLUFactor_SeqBAIJ,0,
475        0,
476        0,
477        MatGetInfo_SeqBAIJ,0,
478        0,0,MatNorm_SeqBAIJ,
479        0,0,
480        0,
481        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
482        MatGetReordering_SeqBAIJ,
483        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
484        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
485        MatILUFactorSymbolic_SeqBAIJ,0,
486        0,0,/*MatConvert_SeqBAIJ*/ 0,
487        0,0,
488        MatConvertSameType_SeqBAIJ,0,0,
489        MatILUFactor_SeqBAIJ,0,0,
490        0,0,
491        0,0,
492        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
493        0};
494 
495 /*@C
496    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
497    (the default parallel PETSc format).  For good matrix assembly performance
498    the user should preallocate the matrix storage by setting the parameter nz
499    (or nzz).  By setting these parameters accurately, performance can be
500    increased by more than a factor of 50.
501 
502    Input Parameters:
503 .  comm - MPI communicator, set to MPI_COMM_SELF
504 .  bs - size of block
505 .  m - number of rows
506 .  n - number of columns
507 .  nz - number of block nonzeros per block row (same for all rows)
508 .  nzz - number of block nonzeros per block row or PETSC_NULL
509          (possibly different for each row)
510 
511    Output Parameter:
512 .  A - the matrix
513 
514    Notes:
515    The BAIJ format, is fully compatible with standard Fortran 77
516    storage.  That is, the stored row and column indices can begin at
517    either one (as in Fortran) or zero.  See the users manual for details.
518 
519    Specify the preallocated storage with either nz or nnz (not both).
520    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
521    allocation.  For additional details, see the users manual chapter on
522    matrices and the file $(PETSC_DIR)/Performance.
523 
524 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
525 @*/
526 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
527 {
528   Mat         B;
529   Mat_SeqBAIJ *b;
530   int         i,len,ierr, flg,mbs = m/bs;
531 
532   if (mbs*bs != m)
533     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
534 
535   *A      = 0;
536   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
537   PLogObjectCreate(B);
538   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
539   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
540   switch (bs) {
541     case 1:
542       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
543       break;
544     case 2:
545       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
546     case 3:
547       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
548     break;
549   }
550   B->destroy          = MatDestroy_SeqBAIJ;
551   B->view             = MatView_SeqBAIJ;
552   B->factor           = 0;
553   B->lupivotthreshold = 1.0;
554   b->row              = 0;
555   b->col              = 0;
556   b->reallocs         = 0;
557 
558   b->m       = m;
559   b->mbs     = mbs;
560   b->n       = n;
561   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
562   if (nnz == PETSC_NULL) {
563     if (nz == PETSC_DEFAULT) nz = 5;
564     else if (nz <= 0)        nz = 1;
565     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
566     nz = nz*mbs;
567   }
568   else {
569     nz = 0;
570     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
571   }
572 
573   /* allocate the matrix space */
574   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
575   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
576   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
577   b->j  = (int *) (b->a + nz*bs*bs);
578   PetscMemzero(b->j,nz*sizeof(int));
579   b->i  = b->j + nz;
580   b->singlemalloc = 1;
581 
582   b->i[0] = 0;
583   for (i=1; i<mbs+1; i++) {
584     b->i[i] = b->i[i-1] + b->imax[i-1];
585   }
586 
587   /* b->ilen will count nonzeros in each block row so far. */
588   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
589   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
590   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
591 
592   b->bs               = bs;
593   b->mbs              = mbs;
594   b->nz               = 0;
595   b->maxnz            = nz;
596   b->sorted           = 0;
597   b->roworiented      = 1;
598   b->nonew            = 0;
599   b->diag             = 0;
600   b->solve_work       = 0;
601   b->mult_work        = 0;
602   b->spptr            = 0;
603 
604   *A = B;
605   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
606   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
607   return 0;
608 }
609 
610 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
611 {
612   Mat         C;
613   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
614   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
615 
616   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
617 
618   *B = 0;
619   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
620   PLogObjectCreate(C);
621   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
622   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
623   C->destroy    = MatDestroy_SeqBAIJ;
624   C->view       = MatView_SeqBAIJ;
625   C->factor     = A->factor;
626   c->row        = 0;
627   c->col        = 0;
628   C->assembled  = PETSC_TRUE;
629 
630   c->m          = a->m;
631   c->n          = a->n;
632   c->bs         = a->bs;
633   c->mbs        = a->mbs;
634   c->nbs        = a->nbs;
635 
636   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
637   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
638   for ( i=0; i<mbs; i++ ) {
639     c->imax[i] = a->imax[i];
640     c->ilen[i] = a->ilen[i];
641   }
642 
643   /* allocate the matrix space */
644   c->singlemalloc = 1;
645   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
646   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
647   c->j  = (int *) (c->a + nz*bs*bs);
648   c->i  = c->j + nz;
649   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
650   if (mbs > 0) {
651     PetscMemcpy(c->j,a->j,nz*sizeof(int));
652     if (cpvalues == COPY_VALUES) {
653       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
654     }
655   }
656 
657   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
658   c->sorted      = a->sorted;
659   c->roworiented = a->roworiented;
660   c->nonew       = a->nonew;
661 
662   if (a->diag) {
663     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
664     PLogObjectMemory(C,(mbs+1)*sizeof(int));
665     for ( i=0; i<mbs; i++ ) {
666       c->diag[i] = a->diag[i];
667     }
668   }
669   else c->diag          = 0;
670   c->nz                 = a->nz;
671   c->maxnz              = a->maxnz;
672   c->solve_work         = 0;
673   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
674   c->mult_work          = 0;
675   *B = C;
676   return 0;
677 }
678 
679 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
680 {
681   Mat_SeqBAIJ  *a;
682   Mat          B;
683   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
684   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
685   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
686   int          *masked, nmask,tmp,ishift,bs2;
687   Scalar       *aa;
688   PetscObject  vobj = (PetscObject) bview;
689   MPI_Comm     comm = vobj->comm;
690 
691   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
692   bs2  = bs*bs;
693 
694   MPI_Comm_size(comm,&size);
695   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
696   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
697   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
698   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
699   M = header[1]; N = header[2]; nz = header[3];
700 
701   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
702 
703   /*
704      This code adds extra rows to make sure the number of rows is
705     divisible by the blocksize
706   */
707   mbs        = M/bs;
708   extra_rows = bs - M + bs*(mbs);
709   if (extra_rows == bs) extra_rows = 0;
710   else                  mbs++;
711   if (extra_rows) {
712     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
713   }
714 
715   /* read in row lengths */
716   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
717   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
718   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
719 
720   /* read in column indices */
721   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
722   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
723   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
724 
725   /* loop over row lengths determining block row lengths */
726   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
727   PetscMemzero(browlengths,mbs*sizeof(int));
728   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
729   PetscMemzero(mask,mbs*sizeof(int));
730   masked = mask + mbs;
731   rowcount = 0; nzcount = 0;
732   for ( i=0; i<mbs; i++ ) {
733     nmask = 0;
734     for ( j=0; j<bs; j++ ) {
735       kmax = rowlengths[rowcount];
736       for ( k=0; k<kmax; k++ ) {
737         tmp = jj[nzcount++]/bs;
738         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
739       }
740       rowcount++;
741     }
742     browlengths[i] += nmask;
743     /* zero out the mask elements we set */
744     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
745   }
746 
747   /* create our matrix */
748   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
749          CHKERRQ(ierr);
750   B = *A;
751   a = (Mat_SeqBAIJ *) B->data;
752 
753   /* set matrix "i" values */
754   a->i[0] = 0;
755   for ( i=1; i<= mbs; i++ ) {
756     a->i[i]      = a->i[i-1] + browlengths[i-1];
757     a->ilen[i-1] = browlengths[i-1];
758   }
759   a->nz = 0;
760   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
761 
762   /* read in nonzero values */
763   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
764   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
765   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
766 
767   /* set "a" and "j" values into matrix */
768   nzcount = 0; jcount = 0;
769   for ( i=0; i<mbs; i++ ) {
770     nzcountb = nzcount;
771     nmask    = 0;
772     for ( j=0; j<bs; j++ ) {
773       kmax = rowlengths[i*bs+j];
774       for ( k=0; k<kmax; k++ ) {
775         tmp = jj[nzcount++]/bs;
776 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
777       }
778       rowcount++;
779     }
780     /* sort the masked values */
781     SYIsort(nmask,masked);
782 
783     /* set "j" values into matrix */
784     maskcount = 1;
785     for ( j=0; j<nmask; j++ ) {
786       a->j[jcount++]  = masked[j];
787       mask[masked[j]] = maskcount++;
788     }
789     /* set "a" values into matrix */
790     ishift = bs2*a->i[i];
791     for ( j=0; j<bs; j++ ) {
792       kmax = rowlengths[i*bs+j];
793       for ( k=0; k<kmax; k++ ) {
794         tmp    = jj[nzcountb]/bs ;
795         block  = mask[tmp] - 1;
796         point  = jj[nzcountb] - bs*tmp;
797         idx    = ishift + bs2*block + j + bs*point;
798         a->a[idx] = aa[nzcountb++];
799       }
800     }
801     /* zero out the mask elements we set */
802     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
803   }
804   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
805 
806   PetscFree(rowlengths);
807   PetscFree(browlengths);
808   PetscFree(aa);
809   PetscFree(jj);
810   PetscFree(mask);
811 
812   B->assembled = PETSC_TRUE;
813 
814   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
815   if (flg) {
816     Viewer viewer;
817     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
818     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr);
819     ierr = MatView(B,viewer); CHKERRQ(ierr);
820     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
821   }
822   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
823   if (flg) {
824     Viewer viewer;
825     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
826     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
827     ierr = MatView(B,viewer); CHKERRQ(ierr);
828     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
829   }
830   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
831   if (flg) {
832     Viewer viewer;
833     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
834     ierr = MatView(B,viewer); CHKERRQ(ierr);
835     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
836   }
837   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
838   if (flg) {
839     Viewer viewer;
840     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
841     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr);
842     ierr = MatView(B,viewer); CHKERRQ(ierr);
843     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
844   }
845   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
846   if (flg) {
847     Draw    win;
848     ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr);
849     ierr = MatView(B,(Viewer)win); CHKERRQ(ierr);
850     ierr = DrawSyncFlush(win); CHKERRQ(ierr);
851     ierr = DrawDestroy(win); CHKERRQ(ierr);
852   }
853   return 0;
854 }
855 
856 
857 
858