xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9867e2074ee79691714c5c21789531d1a36e00f9)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.19 1996/03/26 18:56:10 bsmith Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
225 {
226   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i;
228   Scalar      *aa,*v_i,*aa_i;
229 
230   bs = a->bs;
231   ai = a->i;
232   aj = a->j;
233   aa = a->a;
234 
235   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
236 
237   bn  = row/bs;   /* Block number */
238   bp  = row % bs; /* Block Position */
239   M   = ai[bn+1] - ai[bn];
240   *nz = bs*M;
241 
242   if (v) {
243     *v = 0;
244     if (*nz) {
245       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
246       for ( i=0; i<M; i++ ) { /* for each block in the block row */
247         v_i  = *v + i*bs;
248         aa_i = aa + bs*bs*(ai[bn] + i);
249         for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];}
250       }
251     }
252   }
253 
254   if (idx) {
255     *idx = 0;
256     if (*nz) {
257       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
258       for ( i=0; i<M; i++ ) { /* for each block in the block row */
259         idx_i = *idx + i*bs;
260         itmp  = bs*aj[ai[bn] + i];
261         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
262       }
263     }
264   }
265   return 0;
266 }
267 
268 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
269 {
270   if (idx) {if (*idx) PetscFree(*idx);}
271   if (v)   {if (*v)   PetscFree(*v);}
272   return 0;
273 }
274 
275 static int MatZeroEntries_SeqBAIJ(Mat A)
276 {
277   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
278   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
279   return 0;
280 }
281 
282 int MatDestroy_SeqBAIJ(PetscObject obj)
283 {
284   Mat        A  = (Mat) obj;
285   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
286 
287 #if defined(PETSC_LOG)
288   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
289 #endif
290   PetscFree(a->a);
291   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
292   if (a->diag) PetscFree(a->diag);
293   if (a->ilen) PetscFree(a->ilen);
294   if (a->imax) PetscFree(a->imax);
295   if (a->solve_work) PetscFree(a->solve_work);
296   if (a->mult_work) PetscFree(a->mult_work);
297   PetscFree(a);
298   return 0;
299 }
300 
301 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
302 {
303   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
304   if      (op == ROW_ORIENTED)              a->roworiented = 1;
305   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
306   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
307   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
308   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
309   else if (op == ROWS_SORTED ||
310            op == SYMMETRIC_MATRIX ||
311            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
312            op == YES_NEW_DIAGONALS)
313     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
314   else if (op == NO_NEW_DIAGONALS)
315     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
316   else
317     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
318   return 0;
319 }
320 
321 
322 /* -------------------------------------------------------*/
323 /* Should check that shapes of vectors and matrices match */
324 /* -------------------------------------------------------*/
325 #include "pinclude/plapack.h"
326 
327 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
328 {
329   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
330   Scalar          *xg,*yg;
331   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
332   register Scalar x1,x2,x3,x4,x5;
333   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
334   int             bs = a->bs,j,n,bs2 = bs*bs;
335 
336   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
337   PetscMemzero(y,m*sizeof(Scalar));
338   x     = x;
339   idx   = a->j;
340   v     = a->a;
341   ii    = a->i;
342 
343   switch (bs) {
344     case 1:
345      for ( i=0; i<m; i++ ) {
346        n    = ii[1] - ii[0]; ii++;
347        sum  = 0.0;
348        while (n--) sum += *v++ * x[*idx++];
349        y[i] = sum;
350       }
351       break;
352     case 2:
353       for ( i=0; i<mbs; i++ ) {
354         n  = ii[1] - ii[0]; ii++;
355         sum1 = 0.0; sum2 = 0.0;
356         for ( j=0; j<n; j++ ) {
357           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
358           sum1 += v[0]*x1 + v[2]*x2;
359           sum2 += v[1]*x1 + v[3]*x2;
360           v += 4;
361         }
362         y[0] += sum1; y[1] += sum2;
363         y += 2;
364       }
365       break;
366     case 3:
367       for ( i=0; i<mbs; i++ ) {
368         n  = ii[1] - ii[0]; ii++;
369         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
370         for ( j=0; j<n; j++ ) {
371           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
372           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
373           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
374           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
375           v += 9;
376         }
377         y[0] += sum1; y[1] += sum2; y[2] += sum3;
378         y += 3;
379       }
380       break;
381     case 4:
382       for ( i=0; i<mbs; i++ ) {
383         n  = ii[1] - ii[0]; ii++;
384         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
385         for ( j=0; j<n; j++ ) {
386           xb = x + 4*(*idx++);
387           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
388           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
389           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
390           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
391           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
392           v += 16;
393         }
394         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
395         y += 4;
396       }
397       break;
398     case 5:
399       for ( i=0; i<mbs; i++ ) {
400         n  = ii[1] - ii[0]; ii++;
401         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
402         for ( j=0; j<n; j++ ) {
403           xb = x + 5*(*idx++);
404           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
405           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
406           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
407           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
408           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
409           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
410           v += 25;
411         }
412         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
413         y += 5;
414       }
415       break;
416       /* block sizes larger then 5 by 5 are handled by BLAS */
417     default: {
418       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
419       if (!a->mult_work) {
420         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
421         CHKPTRQ(a->mult_work);
422       }
423       work = a->mult_work;
424       for ( i=0; i<mbs; i++ ) {
425         n     = ii[1] - ii[0]; ii++;
426         ncols = n*bs;
427         workt = work;
428         for ( j=0; j<n; j++ ) {
429           xb = x + bs*(*idx++);
430           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
431           workt += bs;
432         }
433         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
434         v += n*bs2;
435         y += bs;
436       }
437     }
438   }
439   PLogFlops(2*bs2*a->nz - m);
440   return 0;
441 }
442 
443 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
444 {
445   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
446   if (nz)  *nz  = a->bs*a->bs*a->nz;
447   if (nza) *nza = a->maxnz;
448   if (mem) *mem = (int)A->mem;
449   return 0;
450 }
451 
452 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
453 {
454   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
455 
456   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
457 
458   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
459   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
460       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
461     *flg = PETSC_FALSE; return 0;
462   }
463 
464   /* if the a->i are the same */
465   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
466     *flg = PETSC_FALSE; return 0;
467   }
468 
469   /* if a->j are the same */
470   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
471     *flg = PETSC_FALSE; return 0;
472   }
473 
474   /* if a->a are the same */
475   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
476     *flg = PETSC_FALSE; return 0;
477   }
478   *flg = PETSC_TRUE;
479   return 0;
480 
481 }
482 
483 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
484 {
485   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
486   int         i,j,k,n,row,bs,*ai,*aj,ambs;
487   Scalar      *x, zero = 0.0,*aa,*aa_j;
488 
489   bs  = a->bs;
490   aa   = a->a;
491   ai   = a->i;
492   aj   = a->j;
493   ambs = a->mbs;
494 
495   VecSet(&zero,v);
496   VecGetArray(v,&x); VecGetLocalSize(v,&n);
497   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
498   for ( i=0; i<ambs; i++ ) {
499     for ( j=ai[i]; j<ai[i+1]; j++ ) {
500       if (aj[j] == i) {
501         row  = i*bs;
502         aa_j = aa+j*bs*bs;
503         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
504         break;
505       }
506     }
507   }
508   return 0;
509 }
510 
511 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
512 {
513   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
514   Scalar      *l,*r,x,*v,*aa,*li,*ri;
515   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs;
516 
517   ai  = a->i;
518   aj  = a->j;
519   aa  = a->a;
520   m   = a->m;
521   n   = a->n;
522   bs  = a->bs;
523   mbs = a->mbs;
524 
525   if (ll) {
526     VecGetArray(ll,&l); VecGetSize(ll,&lm);
527     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
528     for ( i=0; i<mbs; i++ ) { /* for each block row */
529       M  = ai[i+1] - ai[i];
530       li = l + i*bs;
531       v  = aa + bs*bs*ai[i];
532       for ( j=0; j<M; j++ ) { /* for each block */
533         for ( k=0; k<bs*bs; k++ ) {
534           (*v++) *= li[k%bs];
535         }
536       }
537     }
538   }
539 
540   if (rr) {
541     VecGetArray(rr,&r); VecGetSize(rr,&rn);
542     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
543     for ( i=0; i<mbs; i++ ) { /* for each block row */
544       M  = ai[i+1] - ai[i];
545       v  = aa + bs*bs*ai[i];
546       for ( j=0; j<M; j++ ) { /* for each block */
547         ri = r + bs*aj[ai[i]+j];
548         for ( k=0; k<bs; k++ ) {
549           x = ri[k];
550           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
551         }
552       }
553     }
554   }
555   return 0;
556 }
557 
558 
559 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
560 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
561 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
562 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
563 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
564 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
565 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
566 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
567 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
568 
569 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
570 {
571   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
572   *m = a->m; *n = a->n;
573   return 0;
574 }
575 
576 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
577 {
578   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
579   *m = 0; *n = a->m;
580   return 0;
581 }
582 
583 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
584 {
585   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
586   Scalar      *v = a->a;
587   double      sum = 0.0;
588   int         i;
589 
590   if (type == NORM_FROBENIUS) {
591     for (i=0; i<a->nz; i++ ) {
592 #if defined(PETSC_COMPLEX)
593       sum += real(conj(*v)*(*v)); v++;
594 #else
595       sum += (*v)*(*v); v++;
596 #endif
597     }
598     *norm = sqrt(sum);
599   }
600   else {
601     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
602   }
603   return 0;
604 }
605 
606 /*
607      note: This can only work for identity for row and col. It would
608    be good to check this and otherwise generate an error.
609 */
610 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
611 {
612   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
613   Mat         outA;
614   int         ierr;
615 
616   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
617 
618   outA          = inA;
619   inA->factor   = FACTOR_LU;
620   a->row        = row;
621   a->col        = col;
622 
623   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
624 
625   if (!a->diag) {
626     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
627   }
628   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
629   return 0;
630 }
631 
632 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
633 {
634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
635   int         one = 1, totalnz = a->bs*a->bs*a->nz;
636   BLscal_( &totalnz, alpha, a->a, &one );
637   PLogFlops(totalnz);
638   return 0;
639 }
640 
641 int MatPrintHelp_SeqBAIJ(Mat A)
642 {
643   static int called = 0;
644 
645   if (called) return 0; else called = 1;
646   return 0;
647 }
648 /* -------------------------------------------------------------------*/
649 static struct _MatOps MatOps = {0,
650        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
651        MatMult_SeqBAIJ,0,
652        0,0,
653        MatSolve_SeqBAIJ,0,
654        0,0,
655        MatLUFactor_SeqBAIJ,0,
656        0,
657        0,
658        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
659        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
660        0,0,
661        0,
662        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
663        MatGetReordering_SeqBAIJ,
664        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
665        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
666        MatILUFactorSymbolic_SeqBAIJ,0,
667        0,0,/*  MatConvert_SeqBAIJ  */ 0,
668        0,0,
669        MatConvertSameType_SeqBAIJ,0,0,
670        MatILUFactor_SeqBAIJ,0,0,
671        0,0,
672        0,0,
673        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
674        0};
675 
676 /*@C
677    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
678    (the default parallel PETSc format).  For good matrix assembly performance
679    the user should preallocate the matrix storage by setting the parameter nz
680    (or nzz).  By setting these parameters accurately, performance can be
681    increased by more than a factor of 50.
682 
683    Input Parameters:
684 .  comm - MPI communicator, set to MPI_COMM_SELF
685 .  bs - size of block
686 .  m - number of rows
687 .  n - number of columns
688 .  nz - number of block nonzeros per block row (same for all rows)
689 .  nzz - number of block nonzeros per block row or PETSC_NULL
690          (possibly different for each row)
691 
692    Output Parameter:
693 .  A - the matrix
694 
695    Notes:
696    The BAIJ format, is fully compatible with standard Fortran 77
697    storage.  That is, the stored row and column indices can begin at
698    either one (as in Fortran) or zero.  See the users manual for details.
699 
700    Specify the preallocated storage with either nz or nnz (not both).
701    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
702    allocation.  For additional details, see the users manual chapter on
703    matrices and the file $(PETSC_DIR)/Performance.
704 
705 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
706 @*/
707 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
708 {
709   Mat         B;
710   Mat_SeqBAIJ *b;
711   int         i,len,ierr, flg,mbs = m/bs;
712 
713   if (mbs*bs != m)
714     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
715 
716   *A      = 0;
717   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
718   PLogObjectCreate(B);
719   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
720   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
721   switch (bs) {
722     case 1:
723       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
724       break;
725     case 2:
726       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
727       break;
728     case 3:
729       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
730       break;
731     case 4:
732       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
733       break;
734     case 5:
735       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
736       break;
737   }
738   B->destroy          = MatDestroy_SeqBAIJ;
739   B->view             = MatView_SeqBAIJ;
740   B->factor           = 0;
741   B->lupivotthreshold = 1.0;
742   b->row              = 0;
743   b->col              = 0;
744   b->reallocs         = 0;
745 
746   b->m       = m;
747   b->mbs     = mbs;
748   b->n       = n;
749   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
750   if (nnz == PETSC_NULL) {
751     if (nz == PETSC_DEFAULT) nz = 5;
752     else if (nz <= 0)        nz = 1;
753     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
754     nz = nz*mbs;
755   }
756   else {
757     nz = 0;
758     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
759   }
760 
761   /* allocate the matrix space */
762   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
763   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
764   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
765   b->j  = (int *) (b->a + nz*bs*bs);
766   PetscMemzero(b->j,nz*sizeof(int));
767   b->i  = b->j + nz;
768   b->singlemalloc = 1;
769 
770   b->i[0] = 0;
771   for (i=1; i<mbs+1; i++) {
772     b->i[i] = b->i[i-1] + b->imax[i-1];
773   }
774 
775   /* b->ilen will count nonzeros in each block row so far. */
776   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
777   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
778   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
779 
780   b->bs               = bs;
781   b->mbs              = mbs;
782   b->nz               = 0;
783   b->maxnz            = nz;
784   b->sorted           = 0;
785   b->roworiented      = 1;
786   b->nonew            = 0;
787   b->diag             = 0;
788   b->solve_work       = 0;
789   b->mult_work        = 0;
790   b->spptr            = 0;
791 
792   *A = B;
793   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
794   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
795   return 0;
796 }
797 
798 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
799 {
800   Mat         C;
801   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
802   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
803 
804   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
805 
806   *B = 0;
807   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
808   PLogObjectCreate(C);
809   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
810   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
811   C->destroy    = MatDestroy_SeqBAIJ;
812   C->view       = MatView_SeqBAIJ;
813   C->factor     = A->factor;
814   c->row        = 0;
815   c->col        = 0;
816   C->assembled  = PETSC_TRUE;
817 
818   c->m          = a->m;
819   c->n          = a->n;
820   c->bs         = a->bs;
821   c->mbs        = a->mbs;
822   c->nbs        = a->nbs;
823 
824   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
825   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
826   for ( i=0; i<mbs; i++ ) {
827     c->imax[i] = a->imax[i];
828     c->ilen[i] = a->ilen[i];
829   }
830 
831   /* allocate the matrix space */
832   c->singlemalloc = 1;
833   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
834   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
835   c->j  = (int *) (c->a + nz*bs*bs);
836   c->i  = c->j + nz;
837   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
838   if (mbs > 0) {
839     PetscMemcpy(c->j,a->j,nz*sizeof(int));
840     if (cpvalues == COPY_VALUES) {
841       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
842     }
843   }
844 
845   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
846   c->sorted      = a->sorted;
847   c->roworiented = a->roworiented;
848   c->nonew       = a->nonew;
849 
850   if (a->diag) {
851     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
852     PLogObjectMemory(C,(mbs+1)*sizeof(int));
853     for ( i=0; i<mbs; i++ ) {
854       c->diag[i] = a->diag[i];
855     }
856   }
857   else c->diag          = 0;
858   c->nz                 = a->nz;
859   c->maxnz              = a->maxnz;
860   c->solve_work         = 0;
861   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
862   c->mult_work          = 0;
863   *B = C;
864   return 0;
865 }
866 
867 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
868 {
869   Mat_SeqBAIJ  *a;
870   Mat          B;
871   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
872   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
873   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
874   int          *masked, nmask,tmp,ishift,bs2;
875   Scalar       *aa;
876   MPI_Comm     comm = ((PetscObject) viewer)->comm;
877 
878   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
879   bs2  = bs*bs;
880 
881   MPI_Comm_size(comm,&size);
882   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
883   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
884   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
885   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
886   M = header[1]; N = header[2]; nz = header[3];
887 
888   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
889 
890   /*
891      This code adds extra rows to make sure the number of rows is
892     divisible by the blocksize
893   */
894   mbs        = M/bs;
895   extra_rows = bs - M + bs*(mbs);
896   if (extra_rows == bs) extra_rows = 0;
897   else                  mbs++;
898   if (extra_rows) {
899     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
900   }
901 
902   /* read in row lengths */
903   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
904   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
905   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
906 
907   /* read in column indices */
908   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
909   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
910   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
911 
912   /* loop over row lengths determining block row lengths */
913   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
914   PetscMemzero(browlengths,mbs*sizeof(int));
915   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
916   PetscMemzero(mask,mbs*sizeof(int));
917   masked = mask + mbs;
918   rowcount = 0; nzcount = 0;
919   for ( i=0; i<mbs; i++ ) {
920     nmask = 0;
921     for ( j=0; j<bs; j++ ) {
922       kmax = rowlengths[rowcount];
923       for ( k=0; k<kmax; k++ ) {
924         tmp = jj[nzcount++]/bs;
925         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
926       }
927       rowcount++;
928     }
929     browlengths[i] += nmask;
930     /* zero out the mask elements we set */
931     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
932   }
933 
934   /* create our matrix */
935   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
936          CHKERRQ(ierr);
937   B = *A;
938   a = (Mat_SeqBAIJ *) B->data;
939 
940   /* set matrix "i" values */
941   a->i[0] = 0;
942   for ( i=1; i<= mbs; i++ ) {
943     a->i[i]      = a->i[i-1] + browlengths[i-1];
944     a->ilen[i-1] = browlengths[i-1];
945   }
946   a->indexshift = 0;
947   a->nz         = 0;
948   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
949 
950   /* read in nonzero values */
951   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
952   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
953   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
954 
955   /* set "a" and "j" values into matrix */
956   nzcount = 0; jcount = 0;
957   for ( i=0; i<mbs; i++ ) {
958     nzcountb = nzcount;
959     nmask    = 0;
960     for ( j=0; j<bs; j++ ) {
961       kmax = rowlengths[i*bs+j];
962       for ( k=0; k<kmax; k++ ) {
963         tmp = jj[nzcount++]/bs;
964 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
965       }
966       rowcount++;
967     }
968     /* sort the masked values */
969     PetscSortInt(nmask,masked);
970 
971     /* set "j" values into matrix */
972     maskcount = 1;
973     for ( j=0; j<nmask; j++ ) {
974       a->j[jcount++]  = masked[j];
975       mask[masked[j]] = maskcount++;
976     }
977     /* set "a" values into matrix */
978     ishift = bs2*a->i[i];
979     for ( j=0; j<bs; j++ ) {
980       kmax = rowlengths[i*bs+j];
981       for ( k=0; k<kmax; k++ ) {
982         tmp    = jj[nzcountb]/bs ;
983         block  = mask[tmp] - 1;
984         point  = jj[nzcountb] - bs*tmp;
985         idx    = ishift + bs2*block + j + bs*point;
986         a->a[idx] = aa[nzcountb++];
987       }
988     }
989     /* zero out the mask elements we set */
990     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
991   }
992   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
993 
994   PetscFree(rowlengths);
995   PetscFree(browlengths);
996   PetscFree(aa);
997   PetscFree(jj);
998   PetscFree(mask);
999 
1000   B->assembled = PETSC_TRUE;
1001 
1002   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1003   if (flg) {
1004     Viewer tviewer;
1005     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1006     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1007     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1008     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1009   }
1010   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1011   if (flg) {
1012     Viewer tviewer;
1013     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1014     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1015     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1016     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1017   }
1018   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1019   if (flg) {
1020     Viewer tviewer;
1021     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1022     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1023     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1024   }
1025   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1026   if (flg) {
1027     Viewer tviewer;
1028     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1029     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1030     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1031     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1032   }
1033   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1034   if (flg) {
1035     Viewer tviewer;
1036     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1037     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1038     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1039     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1040   }
1041   return 0;
1042 }
1043 
1044 
1045 
1046