xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d225ffd7f2a7ed8915dc34d48b00cc8b83dea371)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.21 1996/03/27 18:59:56 balay Exp curfman $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
225 {
226   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227   *m = a->m; *n = a->n;
228   return 0;
229 }
230 
231 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
232 {
233   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
234   *m = 0; *n = a->m;
235   return 0;
236 }
237 
238 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
239 {
240   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
241   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i;
242   Scalar      *aa,*v_i,*aa_i;
243 
244   bs = a->bs;
245   ai = a->i;
246   aj = a->j;
247   aa = a->a;
248 
249   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
250 
251   bn  = row/bs;   /* Block number */
252   bp  = row % bs; /* Block Position */
253   M   = ai[bn+1] - ai[bn];
254   *nz = bs*M;
255 
256   if (v) {
257     *v = 0;
258     if (*nz) {
259       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
260       for ( i=0; i<M; i++ ) { /* for each block in the block row */
261         v_i  = *v + i*bs;
262         aa_i = aa + bs*bs*(ai[bn] + i);
263         for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];}
264       }
265     }
266   }
267 
268   if (idx) {
269     *idx = 0;
270     if (*nz) {
271       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
272       for ( i=0; i<M; i++ ) { /* for each block in the block row */
273         idx_i = *idx + i*bs;
274         itmp  = bs*aj[ai[bn] + i];
275         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
276       }
277     }
278   }
279   return 0;
280 }
281 
282 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
283 {
284   if (idx) {if (*idx) PetscFree(*idx);}
285   if (v)   {if (*v)   PetscFree(*v);}
286   return 0;
287 }
288 
289 static int MatZeroEntries_SeqBAIJ(Mat A)
290 {
291   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
292   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
293   return 0;
294 }
295 
296 int MatDestroy_SeqBAIJ(PetscObject obj)
297 {
298   Mat        A  = (Mat) obj;
299   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
300 
301 #if defined(PETSC_LOG)
302   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
303 #endif
304   PetscFree(a->a);
305   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
306   if (a->diag) PetscFree(a->diag);
307   if (a->ilen) PetscFree(a->ilen);
308   if (a->imax) PetscFree(a->imax);
309   if (a->solve_work) PetscFree(a->solve_work);
310   if (a->mult_work) PetscFree(a->mult_work);
311   PetscFree(a);
312   return 0;
313 }
314 
315 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
316 {
317   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
318   if      (op == ROW_ORIENTED)              a->roworiented = 1;
319   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
320   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
321   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
322   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
323   else if (op == ROWS_SORTED ||
324            op == SYMMETRIC_MATRIX ||
325            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
326            op == YES_NEW_DIAGONALS)
327     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
328   else if (op == NO_NEW_DIAGONALS)
329     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
330   else
331     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
332   return 0;
333 }
334 
335 
336 /* -------------------------------------------------------*/
337 /* Should check that shapes of vectors and matrices match */
338 /* -------------------------------------------------------*/
339 #include "pinclude/plapack.h"
340 
341 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
342 {
343   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
344   Scalar          *xg,*yg;
345   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
346   register Scalar x1,x2,x3,x4,x5;
347   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
348   int             bs = a->bs,j,n,bs2 = bs*bs;
349 
350   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
351   PetscMemzero(y,m*sizeof(Scalar));
352   x     = x;
353   idx   = a->j;
354   v     = a->a;
355   ii    = a->i;
356 
357   switch (bs) {
358     case 1:
359      for ( i=0; i<m; i++ ) {
360        n    = ii[1] - ii[0]; ii++;
361        sum  = 0.0;
362        while (n--) sum += *v++ * x[*idx++];
363        y[i] = sum;
364       }
365       break;
366     case 2:
367       for ( i=0; i<mbs; i++ ) {
368         n  = ii[1] - ii[0]; ii++;
369         sum1 = 0.0; sum2 = 0.0;
370         for ( j=0; j<n; j++ ) {
371           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
372           sum1 += v[0]*x1 + v[2]*x2;
373           sum2 += v[1]*x1 + v[3]*x2;
374           v += 4;
375         }
376         y[0] += sum1; y[1] += sum2;
377         y += 2;
378       }
379       break;
380     case 3:
381       for ( i=0; i<mbs; i++ ) {
382         n  = ii[1] - ii[0]; ii++;
383         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
384         for ( j=0; j<n; j++ ) {
385           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
386           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
387           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
388           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v += 9;
390         }
391         y[0] += sum1; y[1] += sum2; y[2] += sum3;
392         y += 3;
393       }
394       break;
395     case 4:
396       for ( i=0; i<mbs; i++ ) {
397         n  = ii[1] - ii[0]; ii++;
398         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
399         for ( j=0; j<n; j++ ) {
400           xb = x + 4*(*idx++);
401           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
402           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
403           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
404           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
405           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
406           v += 16;
407         }
408         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
409         y += 4;
410       }
411       break;
412     case 5:
413       for ( i=0; i<mbs; i++ ) {
414         n  = ii[1] - ii[0]; ii++;
415         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
416         for ( j=0; j<n; j++ ) {
417           xb = x + 5*(*idx++);
418           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
419           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
420           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
421           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
422           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
423           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
424           v += 25;
425         }
426         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
427         y += 5;
428       }
429       break;
430       /* block sizes larger then 5 by 5 are handled by BLAS */
431     default: {
432       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
433       if (!a->mult_work) {
434         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
435         CHKPTRQ(a->mult_work);
436       }
437       work = a->mult_work;
438       for ( i=0; i<mbs; i++ ) {
439         n     = ii[1] - ii[0]; ii++;
440         ncols = n*bs;
441         workt = work;
442         for ( j=0; j<n; j++ ) {
443           xb = x + bs*(*idx++);
444           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
445           workt += bs;
446         }
447         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
448         v += n*bs2;
449         y += bs;
450       }
451     }
452   }
453   PLogFlops(2*bs2*a->nz - m);
454   return 0;
455 }
456 
457 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
458 {
459   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
460   if (nz)  *nz  = a->bs*a->bs*a->nz;
461   if (nza) *nza = a->maxnz;
462   if (mem) *mem = (int)A->mem;
463   return 0;
464 }
465 
466 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
467 {
468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
469 
470   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
471 
472   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
473   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
474       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
475     *flg = PETSC_FALSE; return 0;
476   }
477 
478   /* if the a->i are the same */
479   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
480     *flg = PETSC_FALSE; return 0;
481   }
482 
483   /* if a->j are the same */
484   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
485     *flg = PETSC_FALSE; return 0;
486   }
487 
488   /* if a->a are the same */
489   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
490     *flg = PETSC_FALSE; return 0;
491   }
492   *flg = PETSC_TRUE;
493   return 0;
494 
495 }
496 
497 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
498 {
499   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
500   int         i,j,k,n,row,bs,*ai,*aj,ambs;
501   Scalar      *x, zero = 0.0,*aa,*aa_j;
502 
503   bs  = a->bs;
504   aa   = a->a;
505   ai   = a->i;
506   aj   = a->j;
507   ambs = a->mbs;
508 
509   VecSet(&zero,v);
510   VecGetArray(v,&x); VecGetLocalSize(v,&n);
511   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
512   for ( i=0; i<ambs; i++ ) {
513     for ( j=ai[i]; j<ai[i+1]; j++ ) {
514       if (aj[j] == i) {
515         row  = i*bs;
516         aa_j = aa+j*bs*bs;
517         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
518         break;
519       }
520     }
521   }
522   return 0;
523 }
524 
525 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
526 {
527   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
528   Scalar      *l,*r,x,*v,*aa,*li,*ri;
529   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs;
530 
531   ai  = a->i;
532   aj  = a->j;
533   aa  = a->a;
534   m   = a->m;
535   n   = a->n;
536   bs  = a->bs;
537   mbs = a->mbs;
538 
539   if (ll) {
540     VecGetArray(ll,&l); VecGetSize(ll,&lm);
541     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
542     for ( i=0; i<mbs; i++ ) { /* for each block row */
543       M  = ai[i+1] - ai[i];
544       li = l + i*bs;
545       v  = aa + bs*bs*ai[i];
546       for ( j=0; j<M; j++ ) { /* for each block */
547         for ( k=0; k<bs*bs; k++ ) {
548           (*v++) *= li[k%bs];
549         }
550       }
551     }
552   }
553 
554   if (rr) {
555     VecGetArray(rr,&r); VecGetSize(rr,&rn);
556     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
557     for ( i=0; i<mbs; i++ ) { /* for each block row */
558       M  = ai[i+1] - ai[i];
559       v  = aa + bs*bs*ai[i];
560       for ( j=0; j<M; j++ ) { /* for each block */
561         ri = r + bs*aj[ai[i]+j];
562         for ( k=0; k<bs; k++ ) {
563           x = ri[k];
564           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
565         }
566       }
567     }
568   }
569   return 0;
570 }
571 
572 
573 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
574 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
575 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
576 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
577 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
578 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
579 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
580 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
581 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
582 
583 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
584 {
585   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
586   Scalar      *v = a->a;
587   double      sum = 0.0;
588   int         i;
589 
590   if (type == NORM_FROBENIUS) {
591     for (i=0; i<a->nz; i++ ) {
592 #if defined(PETSC_COMPLEX)
593       sum += real(conj(*v)*(*v)); v++;
594 #else
595       sum += (*v)*(*v); v++;
596 #endif
597     }
598     *norm = sqrt(sum);
599   }
600   else {
601     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
602   }
603   return 0;
604 }
605 
606 /*
607      note: This can only work for identity for row and col. It would
608    be good to check this and otherwise generate an error.
609 */
610 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
611 {
612   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
613   Mat         outA;
614   int         ierr;
615 
616   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
617 
618   outA          = inA;
619   inA->factor   = FACTOR_LU;
620   a->row        = row;
621   a->col        = col;
622 
623   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
624 
625   if (!a->diag) {
626     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
627   }
628   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
629   return 0;
630 }
631 
632 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
633 {
634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
635   int         one = 1, totalnz = a->bs*a->bs*a->nz;
636   BLscal_( &totalnz, alpha, a->a, &one );
637   PLogFlops(totalnz);
638   return 0;
639 }
640 
641 int MatPrintHelp_SeqBAIJ(Mat A)
642 {
643   static int called = 0;
644 
645   if (called) return 0; else called = 1;
646   return 0;
647 }
648 /* -------------------------------------------------------------------*/
649 static struct _MatOps MatOps = {0,
650        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
651        MatMult_SeqBAIJ,0,
652        0,0,
653        MatSolve_SeqBAIJ,0,
654        0,0,
655        MatLUFactor_SeqBAIJ,0,
656        0,
657        0,
658        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
659        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
660        0,0,
661        0,
662        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
663        MatGetReordering_SeqBAIJ,
664        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
665        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
666        MatILUFactorSymbolic_SeqBAIJ,0,
667        0,0,/*  MatConvert_SeqBAIJ  */ 0,
668        0,0,
669        MatConvertSameType_SeqBAIJ,0,0,
670        MatILUFactor_SeqBAIJ,0,0,
671        0,0,
672        0,0,
673        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
674        0};
675 
676 /*@C
677    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
678    (the default parallel PETSc format).  For good matrix assembly performance
679    the user should preallocate the matrix storage by setting the parameter nz
680    (or nzz).  By setting these parameters accurately, performance can be
681    increased by more than a factor of 50.
682 
683    Input Parameters:
684 .  comm - MPI communicator, set to MPI_COMM_SELF
685 .  bs - size of block
686 .  m - number of rows
687 .  n - number of columns
688 .  nz - number of block nonzeros per block row (same for all rows)
689 .  nzz - number of block nonzeros per block row or PETSC_NULL
690          (possibly different for each row)
691 
692    Output Parameter:
693 .  A - the matrix
694 
695    Notes:
696    The BAIJ format, is fully compatible with standard Fortran 77
697    storage.  That is, the stored row and column indices can begin at
698    either one (as in Fortran) or zero.  See the users manual for details.
699 
700    Specify the preallocated storage with either nz or nnz (not both).
701    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
702    allocation.  For additional details, see the users manual chapter on
703    matrices and the file $(PETSC_DIR)/Performance.
704 
705 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
706 @*/
707 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
708 {
709   Mat         B;
710   Mat_SeqBAIJ *b;
711   int         i,len,ierr, flg,mbs = m/bs;
712 
713   if (mbs*bs != m)
714     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
715 
716   *A      = 0;
717   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
718   PLogObjectCreate(B);
719   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
720   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
721   switch (bs) {
722     case 1:
723       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
724       break;
725     case 2:
726       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
727       break;
728     case 3:
729       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
730       break;
731     case 4:
732       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
733       break;
734     case 5:
735       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
736       break;
737   }
738   B->destroy          = MatDestroy_SeqBAIJ;
739   B->view             = MatView_SeqBAIJ;
740   B->factor           = 0;
741   B->lupivotthreshold = 1.0;
742   b->row              = 0;
743   b->col              = 0;
744   b->reallocs         = 0;
745 
746   b->m       = m;
747   b->mbs     = mbs;
748   b->n       = n;
749   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
750   if (nnz == PETSC_NULL) {
751     if (nz == PETSC_DEFAULT) nz = 5;
752     else if (nz <= 0)        nz = 1;
753     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
754     nz = nz*mbs;
755   }
756   else {
757     nz = 0;
758     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
759   }
760 
761   /* allocate the matrix space */
762   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
763   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
764   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
765   b->j  = (int *) (b->a + nz*bs*bs);
766   PetscMemzero(b->j,nz*sizeof(int));
767   b->i  = b->j + nz;
768   b->singlemalloc = 1;
769 
770   b->i[0] = 0;
771   for (i=1; i<mbs+1; i++) {
772     b->i[i] = b->i[i-1] + b->imax[i-1];
773   }
774 
775   /* b->ilen will count nonzeros in each block row so far. */
776   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
777   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
778   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
779 
780   b->bs               = bs;
781   b->mbs              = mbs;
782   b->nz               = 0;
783   b->maxnz            = nz;
784   b->sorted           = 0;
785   b->roworiented      = 1;
786   b->nonew            = 0;
787   b->diag             = 0;
788   b->solve_work       = 0;
789   b->mult_work        = 0;
790   b->spptr            = 0;
791 
792   *A = B;
793   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
794   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
795   return 0;
796 }
797 
798 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
799 {
800   Mat         C;
801   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
802   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
803 
804   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
805 
806   *B = 0;
807   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
808   PLogObjectCreate(C);
809   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
810   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
811   C->destroy    = MatDestroy_SeqBAIJ;
812   C->view       = MatView_SeqBAIJ;
813   C->factor     = A->factor;
814   c->row        = 0;
815   c->col        = 0;
816   C->assembled  = PETSC_TRUE;
817 
818   c->m          = a->m;
819   c->n          = a->n;
820   c->bs         = a->bs;
821   c->mbs        = a->mbs;
822   c->nbs        = a->nbs;
823 
824   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
825   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
826   for ( i=0; i<mbs; i++ ) {
827     c->imax[i] = a->imax[i];
828     c->ilen[i] = a->ilen[i];
829   }
830 
831   /* allocate the matrix space */
832   c->singlemalloc = 1;
833   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
834   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
835   c->j  = (int *) (c->a + nz*bs*bs);
836   c->i  = c->j + nz;
837   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
838   if (mbs > 0) {
839     PetscMemcpy(c->j,a->j,nz*sizeof(int));
840     if (cpvalues == COPY_VALUES) {
841       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
842     }
843   }
844 
845   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
846   c->sorted      = a->sorted;
847   c->roworiented = a->roworiented;
848   c->nonew       = a->nonew;
849 
850   if (a->diag) {
851     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
852     PLogObjectMemory(C,(mbs+1)*sizeof(int));
853     for ( i=0; i<mbs; i++ ) {
854       c->diag[i] = a->diag[i];
855     }
856   }
857   else c->diag          = 0;
858   c->nz                 = a->nz;
859   c->maxnz              = a->maxnz;
860   c->solve_work         = 0;
861   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
862   c->mult_work          = 0;
863   *B = C;
864   return 0;
865 }
866 
867 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
868 {
869   Mat_SeqBAIJ  *a;
870   Mat          B;
871   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
872   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
873   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
874   int          *masked, nmask,tmp,ishift,bs2;
875   Scalar       *aa;
876   MPI_Comm     comm = ((PetscObject) viewer)->comm;
877 
878   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
879   bs2  = bs*bs;
880 
881   MPI_Comm_size(comm,&size);
882   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
883   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
884   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
885   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
886   M = header[1]; N = header[2]; nz = header[3];
887 
888   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
889 
890   /*
891      This code adds extra rows to make sure the number of rows is
892     divisible by the blocksize
893   */
894   mbs        = M/bs;
895   extra_rows = bs - M + bs*(mbs);
896   if (extra_rows == bs) extra_rows = 0;
897   else                  mbs++;
898   if (extra_rows) {
899     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
900   }
901 
902   /* read in row lengths */
903   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
904   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
905   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
906 
907   /* read in column indices */
908   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
909   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
910   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
911 
912   /* loop over row lengths determining block row lengths */
913   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
914   PetscMemzero(browlengths,mbs*sizeof(int));
915   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
916   PetscMemzero(mask,mbs*sizeof(int));
917   masked = mask + mbs;
918   rowcount = 0; nzcount = 0;
919   for ( i=0; i<mbs; i++ ) {
920     nmask = 0;
921     for ( j=0; j<bs; j++ ) {
922       kmax = rowlengths[rowcount];
923       for ( k=0; k<kmax; k++ ) {
924         tmp = jj[nzcount++]/bs;
925         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
926       }
927       rowcount++;
928     }
929     browlengths[i] += nmask;
930     /* zero out the mask elements we set */
931     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
932   }
933 
934   /* create our matrix */
935   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
936          CHKERRQ(ierr);
937   B = *A;
938   a = (Mat_SeqBAIJ *) B->data;
939 
940   /* set matrix "i" values */
941   a->i[0] = 0;
942   for ( i=1; i<= mbs; i++ ) {
943     a->i[i]      = a->i[i-1] + browlengths[i-1];
944     a->ilen[i-1] = browlengths[i-1];
945   }
946   a->indexshift = 0;
947   a->nz         = 0;
948   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
949 
950   /* read in nonzero values */
951   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
952   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
953   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
954 
955   /* set "a" and "j" values into matrix */
956   nzcount = 0; jcount = 0;
957   for ( i=0; i<mbs; i++ ) {
958     nzcountb = nzcount;
959     nmask    = 0;
960     for ( j=0; j<bs; j++ ) {
961       kmax = rowlengths[i*bs+j];
962       for ( k=0; k<kmax; k++ ) {
963         tmp = jj[nzcount++]/bs;
964 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
965       }
966       rowcount++;
967     }
968     /* sort the masked values */
969     PetscSortInt(nmask,masked);
970 
971     /* set "j" values into matrix */
972     maskcount = 1;
973     for ( j=0; j<nmask; j++ ) {
974       a->j[jcount++]  = masked[j];
975       mask[masked[j]] = maskcount++;
976     }
977     /* set "a" values into matrix */
978     ishift = bs2*a->i[i];
979     for ( j=0; j<bs; j++ ) {
980       kmax = rowlengths[i*bs+j];
981       for ( k=0; k<kmax; k++ ) {
982         tmp    = jj[nzcountb]/bs ;
983         block  = mask[tmp] - 1;
984         point  = jj[nzcountb] - bs*tmp;
985         idx    = ishift + bs2*block + j + bs*point;
986         a->a[idx] = aa[nzcountb++];
987       }
988     }
989     /* zero out the mask elements we set */
990     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
991   }
992   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
993 
994   PetscFree(rowlengths);
995   PetscFree(browlengths);
996   PetscFree(aa);
997   PetscFree(jj);
998   PetscFree(mask);
999 
1000   B->assembled = PETSC_TRUE;
1001 
1002   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1003   if (flg) {
1004     Viewer tviewer;
1005     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1006     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1007     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1008     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1009   }
1010   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1011   if (flg) {
1012     Viewer tviewer;
1013     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1014     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1015     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1016     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1017   }
1018   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1019   if (flg) {
1020     Viewer tviewer;
1021     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1022     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1023     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1024   }
1025   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1026   if (flg) {
1027     Viewer tviewer;
1028     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1029     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1030     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1031     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1032   }
1033   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1034   if (flg) {
1035     Viewer tviewer;
1036     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1037     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1038     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1039     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1040   }
1041   return 0;
1042 }
1043 
1044 
1045 
1046