xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0b824a480752e09606a2525fc849a9d89d7ee6f2)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.20 1996/03/27 00:06:28 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
225 {
226   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227   *m = a->m; *n = a->n;
228   return 0;
229 }
230 
231 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
232 {
233   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
234   *m = 0; *n = a->m;
235   return 0;
236 }
237 
238 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
239 {
240   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
241   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i;
242   Scalar      *aa,*v_i,*aa_i;
243 
244   bs = a->bs;
245   ai = a->i;
246   aj = a->j;
247   aa = a->a;
248 
249   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
250 
251   bn  = row/bs;   /* Block number */
252   bp  = row % bs; /* Block Position */
253   M   = ai[bn+1] - ai[bn];
254   *nz = bs*M;
255 
256   if (v) {
257     *v = 0;
258     if (*nz) {
259       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
260       for ( i=0; i<M; i++ ) { /* for each block in the block row */
261         v_i  = *v + i*bs;
262         aa_i = aa + bs*bs*(ai[bn] + i);
263         for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];}
264       }
265     }
266   }
267 
268   if (idx) {
269     *idx = 0;
270     if (*nz) {
271       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
272       for ( i=0; i<M; i++ ) { /* for each block in the block row */
273         idx_i = *idx + i*bs;
274         itmp  = bs*aj[ai[bn] + i];
275         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
276       }
277     }
278   }
279   return 0;
280 }
281 
282 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
283 {
284   if (idx) {if (*idx) PetscFree(*idx);}
285   if (v)   {if (*v)   PetscFree(*v);}
286   return 0;
287 }
288 
289 static int MatZeroEntries_SeqBAIJ(Mat A)
290 {
291   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
292   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
293   return 0;
294 }
295 
296 int MatDestroy_SeqBAIJ(PetscObject obj)
297 {
298   Mat        A  = (Mat) obj;
299   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
300 
301 #if defined(PETSC_LOG)
302   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
303 #endif
304   PetscFree(a->a);
305   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
306   if (a->diag) PetscFree(a->diag);
307   if (a->ilen) PetscFree(a->ilen);
308   if (a->imax) PetscFree(a->imax);
309   if (a->solve_work) PetscFree(a->solve_work);
310   if (a->mult_work) PetscFree(a->mult_work);
311   PetscFree(a);
312   return 0;
313 }
314 
315 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
316 {
317   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
318   if      (op == ROW_ORIENTED)              a->roworiented = 1;
319   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
320   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
321   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
322   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
323   else if (op == ROWS_SORTED ||
324            op == SYMMETRIC_MATRIX ||
325            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
326            op == YES_NEW_DIAGONALS)
327     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
328   else if (op == NO_NEW_DIAGONALS)
329     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
330   else
331     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
332   return 0;
333 }
334 
335 
336 /* -------------------------------------------------------*/
337 /* Should check that shapes of vectors and matrices match */
338 /* -------------------------------------------------------*/
339 #include "pinclude/plapack.h"
340 
341 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
342 {
343   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
344   Scalar          *xg,*yg;
345   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
346   register Scalar x1,x2,x3,x4,x5;
347   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
348   int             bs = a->bs,j,n,bs2 = bs*bs;
349 
350   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
351   PetscMemzero(y,m*sizeof(Scalar));
352   x     = x;
353   idx   = a->j;
354   v     = a->a;
355   ii    = a->i;
356 
357   switch (bs) {
358     case 1:
359      for ( i=0; i<m; i++ ) {
360        n    = ii[1] - ii[0]; ii++;
361        sum  = 0.0;
362        while (n--) sum += *v++ * x[*idx++];
363        y[i] = sum;
364       }
365       break;
366     case 2:
367       for ( i=0; i<mbs; i++ ) {
368         n  = ii[1] - ii[0]; ii++;
369         sum1 = 0.0; sum2 = 0.0;
370         for ( j=0; j<n; j++ ) {
371           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
372           sum1 += v[0]*x1 + v[2]*x2;
373           sum2 += v[1]*x1 + v[3]*x2;
374           v += 4;
375         }
376         y[0] += sum1; y[1] += sum2;
377         y += 2;
378       }
379       break;
380     case 3:
381       for ( i=0; i<mbs; i++ ) {
382         n  = ii[1] - ii[0]; ii++;
383         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
384         for ( j=0; j<n; j++ ) {
385           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
386           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
387           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
388           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v += 9;
390         }
391         y[0] += sum1; y[1] += sum2; y[2] += sum3;
392         y += 3;
393       }
394       break;
395     case 4:
396       for ( i=0; i<mbs; i++ ) {
397         n  = ii[1] - ii[0]; ii++;
398         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
399         for ( j=0; j<n; j++ ) {
400           xb = x + 4*(*idx++);
401           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
402           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
403           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
404           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
405           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
406           v += 16;
407         }
408         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
409         y += 4;
410       }
411       break;
412     case 5:
413       for ( i=0; i<mbs; i++ ) {
414         n  = ii[1] - ii[0]; ii++;
415         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
416         for ( j=0; j<n; j++ ) {
417           xb = x + 5*(*idx++);
418           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
419           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
420           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
421           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
422           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
423           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
424           v += 25;
425         }
426         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
427         y += 5;
428       }
429       break;
430       /* block sizes larger then 5 by 5 are handled by BLAS */
431     default: {
432       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
433       if (!a->mult_work) {
434         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
435         CHKPTRQ(a->mult_work);
436       }
437       work = a->mult_work;
438       for ( i=0; i<mbs; i++ ) {
439         n     = ii[1] - ii[0]; ii++;
440         ncols = n*bs;
441         workt = work;
442         for ( j=0; j<n; j++ ) {
443           xb = x + bs*(*idx++);
444           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
445           workt += bs;
446         }
447         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
448         v += n*bs2;
449         y += bs;
450       }
451     }
452   }
453   PLogFlops(2*bs2*a->nz - m);
454   return 0;
455 }
456 
457 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
458 {
459   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
460   if (nz)  *nz  = a->bs*a->bs*a->nz;
461   if (nza) *nza = a->maxnz;
462   if (mem) *mem = (int)A->mem;
463   return 0;
464 }
465 
466 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
467 {
468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
469 
470   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
471 
472   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
473   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
474       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
475     *flg = PETSC_FALSE; return 0;
476   }
477 
478   /* if the a->i are the same */
479   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
480     *flg = PETSC_FALSE; return 0;
481   }
482 
483   /* if a->j are the same */
484   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
485     *flg = PETSC_FALSE; return 0;
486   }
487 
488   /* if a->a are the same */
489   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
490     *flg = PETSC_FALSE; return 0;
491   }
492   *flg = PETSC_TRUE;
493   return 0;
494 
495 }
496 
497 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
498 {
499   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
500   int         i,j,k,n,row,bs,*ai,*aj,ambs;
501   Scalar      *x, zero = 0.0,*aa,*aa_j;
502 
503   bs  = a->bs;
504   aa   = a->a;
505   ai   = a->i;
506   aj   = a->j;
507   ambs = a->mbs;
508 
509   VecSet(&zero,v);
510   VecGetArray(v,&x); VecGetLocalSize(v,&n);
511   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
512   for ( i=0; i<ambs; i++ ) {
513     for ( j=ai[i]; j<ai[i+1]; j++ ) {
514       if (aj[j] == i) {
515         row  = i*bs;
516         aa_j = aa+j*bs*bs;
517         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
518         break;
519       }
520     }
521   }
522   return 0;
523 }
524 
525 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
526 {
527   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
528   Scalar      *l,*r,x,*v,*aa,*li,*ri;
529   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs;
530 
531   ai  = a->i;
532   aj  = a->j;
533   aa  = a->a;
534   m   = a->m;
535   n   = a->n;
536   bs  = a->bs;
537   mbs = a->mbs;
538 
539   if (ll) {
540     VecGetArray(ll,&l); VecGetSize(ll,&lm);
541     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
542     for ( i=0; i<mbs; i++ ) { /* for each block row */
543       M  = ai[i+1] - ai[i];
544       li = l + i*bs;
545       v  = aa + bs*bs*ai[i];
546       for ( j=0; j<M; j++ ) { /* for each block */
547         for ( k=0; k<bs*bs; k++ ) {
548           (*v++) *= li[k%bs];
549         }
550       }
551     }
552   }
553 
554   if (rr) {
555     VecGetArray(rr,&r); VecGetSize(rr,&rn);
556     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
557     for ( i=0; i<mbs; i++ ) { /* for each block row */
558       M  = ai[i+1] - ai[i];
559       v  = aa + bs*bs*ai[i];
560       for ( j=0; j<M; j++ ) { /* for each block */
561         ri = r + bs*aj[ai[i]+j];
562         for ( k=0; k<bs; k++ ) {
563           x = ri[k];
564           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
565         }
566       }
567     }
568   }
569   return 0;
570 }
571 
572 
573 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
574 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
575 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
576 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
577 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
578 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
579 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
580 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
581 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
582 
583 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
584 {
585   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
586   *m = a->m; *n = a->n;
587   return 0;
588 }
589 
590 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
591 {
592   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
593   *m = 0; *n = a->m;
594   return 0;
595 }
596 
597 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
598 {
599   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
600   Scalar      *v = a->a;
601   double      sum = 0.0;
602   int         i;
603 
604   if (type == NORM_FROBENIUS) {
605     for (i=0; i<a->nz; i++ ) {
606 #if defined(PETSC_COMPLEX)
607       sum += real(conj(*v)*(*v)); v++;
608 #else
609       sum += (*v)*(*v); v++;
610 #endif
611     }
612     *norm = sqrt(sum);
613   }
614   else {
615     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
616   }
617   return 0;
618 }
619 
620 /*
621      note: This can only work for identity for row and col. It would
622    be good to check this and otherwise generate an error.
623 */
624 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
625 {
626   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
627   Mat         outA;
628   int         ierr;
629 
630   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
631 
632   outA          = inA;
633   inA->factor   = FACTOR_LU;
634   a->row        = row;
635   a->col        = col;
636 
637   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
638 
639   if (!a->diag) {
640     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
641   }
642   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
643   return 0;
644 }
645 
646 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
647 {
648   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
649   int         one = 1, totalnz = a->bs*a->bs*a->nz;
650   BLscal_( &totalnz, alpha, a->a, &one );
651   PLogFlops(totalnz);
652   return 0;
653 }
654 
655 int MatPrintHelp_SeqBAIJ(Mat A)
656 {
657   static int called = 0;
658 
659   if (called) return 0; else called = 1;
660   return 0;
661 }
662 /* -------------------------------------------------------------------*/
663 static struct _MatOps MatOps = {0,
664        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
665        MatMult_SeqBAIJ,0,
666        0,0,
667        MatSolve_SeqBAIJ,0,
668        0,0,
669        MatLUFactor_SeqBAIJ,0,
670        0,
671        0,
672        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
673        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
674        0,0,
675        0,
676        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
677        MatGetReordering_SeqBAIJ,
678        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
679        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
680        MatILUFactorSymbolic_SeqBAIJ,0,
681        0,0,/*  MatConvert_SeqBAIJ  */ 0,
682        0,0,
683        MatConvertSameType_SeqBAIJ,0,0,
684        MatILUFactor_SeqBAIJ,0,0,
685        0,0,
686        0,0,
687        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
688        0};
689 
690 /*@C
691    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
692    (the default parallel PETSc format).  For good matrix assembly performance
693    the user should preallocate the matrix storage by setting the parameter nz
694    (or nzz).  By setting these parameters accurately, performance can be
695    increased by more than a factor of 50.
696 
697    Input Parameters:
698 .  comm - MPI communicator, set to MPI_COMM_SELF
699 .  bs - size of block
700 .  m - number of rows
701 .  n - number of columns
702 .  nz - number of block nonzeros per block row (same for all rows)
703 .  nzz - number of block nonzeros per block row or PETSC_NULL
704          (possibly different for each row)
705 
706    Output Parameter:
707 .  A - the matrix
708 
709    Notes:
710    The BAIJ format, is fully compatible with standard Fortran 77
711    storage.  That is, the stored row and column indices can begin at
712    either one (as in Fortran) or zero.  See the users manual for details.
713 
714    Specify the preallocated storage with either nz or nnz (not both).
715    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
716    allocation.  For additional details, see the users manual chapter on
717    matrices and the file $(PETSC_DIR)/Performance.
718 
719 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
720 @*/
721 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
722 {
723   Mat         B;
724   Mat_SeqBAIJ *b;
725   int         i,len,ierr, flg,mbs = m/bs;
726 
727   if (mbs*bs != m)
728     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
729 
730   *A      = 0;
731   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
732   PLogObjectCreate(B);
733   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
734   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
735   switch (bs) {
736     case 1:
737       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
738       break;
739     case 2:
740       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
741       break;
742     case 3:
743       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
744       break;
745     case 4:
746       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
747       break;
748     case 5:
749       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
750       break;
751   }
752   B->destroy          = MatDestroy_SeqBAIJ;
753   B->view             = MatView_SeqBAIJ;
754   B->factor           = 0;
755   B->lupivotthreshold = 1.0;
756   b->row              = 0;
757   b->col              = 0;
758   b->reallocs         = 0;
759 
760   b->m       = m;
761   b->mbs     = mbs;
762   b->n       = n;
763   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
764   if (nnz == PETSC_NULL) {
765     if (nz == PETSC_DEFAULT) nz = 5;
766     else if (nz <= 0)        nz = 1;
767     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
768     nz = nz*mbs;
769   }
770   else {
771     nz = 0;
772     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
773   }
774 
775   /* allocate the matrix space */
776   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
777   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
778   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
779   b->j  = (int *) (b->a + nz*bs*bs);
780   PetscMemzero(b->j,nz*sizeof(int));
781   b->i  = b->j + nz;
782   b->singlemalloc = 1;
783 
784   b->i[0] = 0;
785   for (i=1; i<mbs+1; i++) {
786     b->i[i] = b->i[i-1] + b->imax[i-1];
787   }
788 
789   /* b->ilen will count nonzeros in each block row so far. */
790   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
791   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
792   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
793 
794   b->bs               = bs;
795   b->mbs              = mbs;
796   b->nz               = 0;
797   b->maxnz            = nz;
798   b->sorted           = 0;
799   b->roworiented      = 1;
800   b->nonew            = 0;
801   b->diag             = 0;
802   b->solve_work       = 0;
803   b->mult_work        = 0;
804   b->spptr            = 0;
805 
806   *A = B;
807   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
808   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
809   return 0;
810 }
811 
812 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
813 {
814   Mat         C;
815   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
816   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
817 
818   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
819 
820   *B = 0;
821   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
822   PLogObjectCreate(C);
823   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
824   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
825   C->destroy    = MatDestroy_SeqBAIJ;
826   C->view       = MatView_SeqBAIJ;
827   C->factor     = A->factor;
828   c->row        = 0;
829   c->col        = 0;
830   C->assembled  = PETSC_TRUE;
831 
832   c->m          = a->m;
833   c->n          = a->n;
834   c->bs         = a->bs;
835   c->mbs        = a->mbs;
836   c->nbs        = a->nbs;
837 
838   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
839   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
840   for ( i=0; i<mbs; i++ ) {
841     c->imax[i] = a->imax[i];
842     c->ilen[i] = a->ilen[i];
843   }
844 
845   /* allocate the matrix space */
846   c->singlemalloc = 1;
847   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
848   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
849   c->j  = (int *) (c->a + nz*bs*bs);
850   c->i  = c->j + nz;
851   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
852   if (mbs > 0) {
853     PetscMemcpy(c->j,a->j,nz*sizeof(int));
854     if (cpvalues == COPY_VALUES) {
855       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
856     }
857   }
858 
859   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
860   c->sorted      = a->sorted;
861   c->roworiented = a->roworiented;
862   c->nonew       = a->nonew;
863 
864   if (a->diag) {
865     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
866     PLogObjectMemory(C,(mbs+1)*sizeof(int));
867     for ( i=0; i<mbs; i++ ) {
868       c->diag[i] = a->diag[i];
869     }
870   }
871   else c->diag          = 0;
872   c->nz                 = a->nz;
873   c->maxnz              = a->maxnz;
874   c->solve_work         = 0;
875   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
876   c->mult_work          = 0;
877   *B = C;
878   return 0;
879 }
880 
881 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
882 {
883   Mat_SeqBAIJ  *a;
884   Mat          B;
885   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
886   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
887   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
888   int          *masked, nmask,tmp,ishift,bs2;
889   Scalar       *aa;
890   MPI_Comm     comm = ((PetscObject) viewer)->comm;
891 
892   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
893   bs2  = bs*bs;
894 
895   MPI_Comm_size(comm,&size);
896   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
897   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
898   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
899   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
900   M = header[1]; N = header[2]; nz = header[3];
901 
902   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
903 
904   /*
905      This code adds extra rows to make sure the number of rows is
906     divisible by the blocksize
907   */
908   mbs        = M/bs;
909   extra_rows = bs - M + bs*(mbs);
910   if (extra_rows == bs) extra_rows = 0;
911   else                  mbs++;
912   if (extra_rows) {
913     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
914   }
915 
916   /* read in row lengths */
917   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
918   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
919   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
920 
921   /* read in column indices */
922   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
923   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
924   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
925 
926   /* loop over row lengths determining block row lengths */
927   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
928   PetscMemzero(browlengths,mbs*sizeof(int));
929   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
930   PetscMemzero(mask,mbs*sizeof(int));
931   masked = mask + mbs;
932   rowcount = 0; nzcount = 0;
933   for ( i=0; i<mbs; i++ ) {
934     nmask = 0;
935     for ( j=0; j<bs; j++ ) {
936       kmax = rowlengths[rowcount];
937       for ( k=0; k<kmax; k++ ) {
938         tmp = jj[nzcount++]/bs;
939         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
940       }
941       rowcount++;
942     }
943     browlengths[i] += nmask;
944     /* zero out the mask elements we set */
945     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
946   }
947 
948   /* create our matrix */
949   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
950          CHKERRQ(ierr);
951   B = *A;
952   a = (Mat_SeqBAIJ *) B->data;
953 
954   /* set matrix "i" values */
955   a->i[0] = 0;
956   for ( i=1; i<= mbs; i++ ) {
957     a->i[i]      = a->i[i-1] + browlengths[i-1];
958     a->ilen[i-1] = browlengths[i-1];
959   }
960   a->indexshift = 0;
961   a->nz         = 0;
962   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
963 
964   /* read in nonzero values */
965   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
966   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
967   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
968 
969   /* set "a" and "j" values into matrix */
970   nzcount = 0; jcount = 0;
971   for ( i=0; i<mbs; i++ ) {
972     nzcountb = nzcount;
973     nmask    = 0;
974     for ( j=0; j<bs; j++ ) {
975       kmax = rowlengths[i*bs+j];
976       for ( k=0; k<kmax; k++ ) {
977         tmp = jj[nzcount++]/bs;
978 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
979       }
980       rowcount++;
981     }
982     /* sort the masked values */
983     PetscSortInt(nmask,masked);
984 
985     /* set "j" values into matrix */
986     maskcount = 1;
987     for ( j=0; j<nmask; j++ ) {
988       a->j[jcount++]  = masked[j];
989       mask[masked[j]] = maskcount++;
990     }
991     /* set "a" values into matrix */
992     ishift = bs2*a->i[i];
993     for ( j=0; j<bs; j++ ) {
994       kmax = rowlengths[i*bs+j];
995       for ( k=0; k<kmax; k++ ) {
996         tmp    = jj[nzcountb]/bs ;
997         block  = mask[tmp] - 1;
998         point  = jj[nzcountb] - bs*tmp;
999         idx    = ishift + bs2*block + j + bs*point;
1000         a->a[idx] = aa[nzcountb++];
1001       }
1002     }
1003     /* zero out the mask elements we set */
1004     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1005   }
1006   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1007 
1008   PetscFree(rowlengths);
1009   PetscFree(browlengths);
1010   PetscFree(aa);
1011   PetscFree(jj);
1012   PetscFree(mask);
1013 
1014   B->assembled = PETSC_TRUE;
1015 
1016   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1017   if (flg) {
1018     Viewer tviewer;
1019     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1020     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1021     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1022     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1023   }
1024   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1025   if (flg) {
1026     Viewer tviewer;
1027     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1028     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1029     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1030     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1031   }
1032   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1033   if (flg) {
1034     Viewer tviewer;
1035     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1036     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1037     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1038   }
1039   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1040   if (flg) {
1041     Viewer tviewer;
1042     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1043     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1044     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1045     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1046   }
1047   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1048   if (flg) {
1049     Viewer tviewer;
1050     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1051     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1052     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1053     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1054   }
1055   return 0;
1056 }
1057 
1058 
1059 
1060