xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b024c5bee1b6062e54466c6fa6e2157ffc486290)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.17 1996/03/25 22:43:35 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 
225 static int MatZeroEntries_SeqBAIJ(Mat A)
226 {
227   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
228   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
229   return 0;
230 }
231 
232 int MatDestroy_SeqBAIJ(PetscObject obj)
233 {
234   Mat        A  = (Mat) obj;
235   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
236 
237 #if defined(PETSC_LOG)
238   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
239 #endif
240   PetscFree(a->a);
241   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
242   if (a->diag) PetscFree(a->diag);
243   if (a->ilen) PetscFree(a->ilen);
244   if (a->imax) PetscFree(a->imax);
245   if (a->solve_work) PetscFree(a->solve_work);
246   if (a->mult_work) PetscFree(a->mult_work);
247   PetscFree(a);
248   PLogObjectDestroy(A);
249   PetscHeaderDestroy(A);
250   return 0;
251 }
252 
253 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
254 {
255   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
256   if      (op == ROW_ORIENTED)              a->roworiented = 1;
257   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
258   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
259   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
260   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
261   else if (op == ROWS_SORTED ||
262            op == SYMMETRIC_MATRIX ||
263            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
264            op == YES_NEW_DIAGONALS)
265     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
266   else if (op == NO_NEW_DIAGONALS)
267     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
268   else
269     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
270   return 0;
271 }
272 
273 
274 /* -------------------------------------------------------*/
275 /* Should check that shapes of vectors and matrices match */
276 /* -------------------------------------------------------*/
277 #include "pinclude/plapack.h"
278 
279 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
280 {
281   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
282   Scalar          *xg,*yg;
283   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
284   register Scalar x1,x2,x3,x4,x5;
285   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
286   int             bs = a->bs,j,n,bs2 = bs*bs;
287 
288   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
289   PetscMemzero(y,m*sizeof(Scalar));
290   x     = x;
291   idx   = a->j;
292   v     = a->a;
293   ii    = a->i;
294 
295   switch (bs) {
296     case 1:
297      for ( i=0; i<m; i++ ) {
298        n    = ii[1] - ii[0]; ii++;
299        sum  = 0.0;
300        while (n--) sum += *v++ * x[*idx++];
301        y[i] = sum;
302       }
303       break;
304     case 2:
305       for ( i=0; i<mbs; i++ ) {
306         n  = ii[1] - ii[0]; ii++;
307         sum1 = 0.0; sum2 = 0.0;
308         for ( j=0; j<n; j++ ) {
309           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
310           sum1 += v[0]*x1 + v[2]*x2;
311           sum2 += v[1]*x1 + v[3]*x2;
312           v += 4;
313         }
314         y[0] += sum1; y[1] += sum2;
315         y += 2;
316       }
317       break;
318     case 3:
319       for ( i=0; i<mbs; i++ ) {
320         n  = ii[1] - ii[0]; ii++;
321         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
322         for ( j=0; j<n; j++ ) {
323           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
324           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
325           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
326           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
327           v += 9;
328         }
329         y[0] += sum1; y[1] += sum2; y[2] += sum3;
330         y += 3;
331       }
332       break;
333     case 4:
334       for ( i=0; i<mbs; i++ ) {
335         n  = ii[1] - ii[0]; ii++;
336         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
337         for ( j=0; j<n; j++ ) {
338           xb = x + 4*(*idx++);
339           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
340           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
341           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
342           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
343           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
344           v += 16;
345         }
346         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
347         y += 4;
348       }
349       break;
350     case 5:
351       for ( i=0; i<mbs; i++ ) {
352         n  = ii[1] - ii[0]; ii++;
353         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
354         for ( j=0; j<n; j++ ) {
355           xb = x + 5*(*idx++);
356           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
357           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
358           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
359           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
360           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
361           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
362           v += 25;
363         }
364         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
365         y += 5;
366       }
367       break;
368       /* block sizes larger then 5 by 5 are handled by BLAS */
369     default: {
370       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
371       if (!a->mult_work) {
372         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
373         CHKPTRQ(a->mult_work);
374       }
375       work = a->mult_work;
376       for ( i=0; i<mbs; i++ ) {
377         n     = ii[1] - ii[0]; ii++;
378         ncols = n*bs;
379         workt = work;
380         for ( j=0; j<n; j++ ) {
381           xb = x + bs*(*idx++);
382           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
383           workt += bs;
384         }
385         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
386         v += n*bs2;
387         y += bs;
388       }
389     }
390   }
391   PLogFlops(2*bs2*a->nz - m);
392   return 0;
393 }
394 
395 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
396 {
397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
398   if (nz)  *nz  = a->bs*a->bs*a->nz;
399   if (nza) *nza = a->maxnz;
400   if (mem) *mem = (int)A->mem;
401   return 0;
402 }
403 
404 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
405 {
406   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
407 
408   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
409 
410   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
411   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
412       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
413     *flg = PETSC_FALSE; return 0;
414   }
415 
416   /* if the a->i are the same */
417   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
418     *flg = PETSC_FALSE; return 0;
419   }
420 
421   /* if a->j are the same */
422   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
423     *flg = PETSC_FALSE; return 0;
424   }
425 
426   /* if a->a are the same */
427   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
428     *flg = PETSC_FALSE; return 0;
429   }
430   *flg = PETSC_TRUE;
431   return 0;
432 
433 }
434 
435 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
436 {
437   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
438   int        i,j,k,n,row,bs,*ai,*aj,ambs;
439   Scalar     *x, zero = 0.0,*aa,*aa_j;
440 
441   bs  = a->bs;
442   aa   = a->a;
443   ai   = a->i;
444   aj   = a->j;
445   ambs = a->mbs;
446 
447   VecSet(&zero,v);
448   VecGetArray(v,&x); VecGetLocalSize(v,&n);
449   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqAIJ:Nonconforming matrix and vector");
450   for ( i=0; i<ambs; i++ ) {
451     for ( j=ai[i]; j<ai[i+1]; j++ ) {
452       if (aj[j] == i) {
453         row  = i*bs;
454         aa_j = aa+j*bs*bs;
455         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
456         break;
457       }
458     }
459   }
460   return 0;
461 }
462 
463 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
464 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
465 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
466 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
467 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
468 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
469 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
470 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
471 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
472 
473 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
474 {
475   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
476   *m = a->m; *n = a->n;
477   return 0;
478 }
479 
480 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
481 {
482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
483   *m = 0; *n = a->m;
484   return 0;
485 }
486 
487 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
488 {
489   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
490   Scalar      *v = a->a;
491   double      sum = 0.0;
492   int         i;
493 
494   if (type == NORM_FROBENIUS) {
495     for (i=0; i<a->nz; i++ ) {
496 #if defined(PETSC_COMPLEX)
497       sum += real(conj(*v)*(*v)); v++;
498 #else
499       sum += (*v)*(*v); v++;
500 #endif
501     }
502     *norm = sqrt(sum);
503   }
504   else {
505     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
506   }
507   return 0;
508 }
509 
510 /*
511      note: This can only work for identity for row and col. It would
512    be good to check this and otherwise generate an error.
513 */
514 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
515 {
516   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
517   Mat         outA;
518   int         ierr;
519 
520   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
521 
522   outA          = inA;
523   inA->factor   = FACTOR_LU;
524   a->row        = row;
525   a->col        = col;
526 
527   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
528 
529   if (!a->diag) {
530     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
531   }
532   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
533   return 0;
534 }
535 
536 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
537 {
538   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
539   int         one = 1, totalnz = a->bs*a->bs*a->nz;
540   BLscal_( &totalnz, alpha, a->a, &one );
541   PLogFlops(totalnz);
542   return 0;
543 }
544 
545 int MatPrintHelp_SeqBAIJ(Mat A)
546 {
547   static int called = 0;
548 
549   if (called) return 0; else called = 1;
550   return 0;
551 }
552 /* -------------------------------------------------------------------*/
553 static struct _MatOps MatOps = {0,
554        0,0,
555        MatMult_SeqBAIJ,0,
556        0,0,
557        MatSolve_SeqBAIJ,0,
558        0,0,
559        MatLUFactor_SeqBAIJ,0,
560        0,
561        0,
562        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
563        MatGetDiagonal_SeqBAIJ,0,MatNorm_SeqBAIJ,
564        0,0,
565        0,
566        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
567        MatGetReordering_SeqBAIJ,
568        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
569        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
570        MatILUFactorSymbolic_SeqBAIJ,0,
571        0,0,/*  MatConvert_SeqBAIJ  */ 0,
572        0,0,
573        MatConvertSameType_SeqBAIJ,0,0,
574        MatILUFactor_SeqBAIJ,0,0,
575        0,0,
576        0,0,
577        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
578        0};
579 
580 /*@C
581    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
582    (the default parallel PETSc format).  For good matrix assembly performance
583    the user should preallocate the matrix storage by setting the parameter nz
584    (or nzz).  By setting these parameters accurately, performance can be
585    increased by more than a factor of 50.
586 
587    Input Parameters:
588 .  comm - MPI communicator, set to MPI_COMM_SELF
589 .  bs - size of block
590 .  m - number of rows
591 .  n - number of columns
592 .  nz - number of block nonzeros per block row (same for all rows)
593 .  nzz - number of block nonzeros per block row or PETSC_NULL
594          (possibly different for each row)
595 
596    Output Parameter:
597 .  A - the matrix
598 
599    Notes:
600    The BAIJ format, is fully compatible with standard Fortran 77
601    storage.  That is, the stored row and column indices can begin at
602    either one (as in Fortran) or zero.  See the users manual for details.
603 
604    Specify the preallocated storage with either nz or nnz (not both).
605    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
606    allocation.  For additional details, see the users manual chapter on
607    matrices and the file $(PETSC_DIR)/Performance.
608 
609 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
610 @*/
611 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
612 {
613   Mat         B;
614   Mat_SeqBAIJ *b;
615   int         i,len,ierr, flg,mbs = m/bs;
616 
617   if (mbs*bs != m)
618     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
619 
620   *A      = 0;
621   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
622   PLogObjectCreate(B);
623   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
624   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
625   switch (bs) {
626     case 1:
627       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
628       break;
629     case 2:
630       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
631       break;
632     case 3:
633       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
634       break;
635     case 4:
636       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
637       break;
638     case 5:
639       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
640       break;
641   }
642   B->destroy          = MatDestroy_SeqBAIJ;
643   B->view             = MatView_SeqBAIJ;
644   B->factor           = 0;
645   B->lupivotthreshold = 1.0;
646   b->row              = 0;
647   b->col              = 0;
648   b->reallocs         = 0;
649 
650   b->m       = m;
651   b->mbs     = mbs;
652   b->n       = n;
653   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
654   if (nnz == PETSC_NULL) {
655     if (nz == PETSC_DEFAULT) nz = 5;
656     else if (nz <= 0)        nz = 1;
657     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
658     nz = nz*mbs;
659   }
660   else {
661     nz = 0;
662     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
663   }
664 
665   /* allocate the matrix space */
666   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
667   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
668   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
669   b->j  = (int *) (b->a + nz*bs*bs);
670   PetscMemzero(b->j,nz*sizeof(int));
671   b->i  = b->j + nz;
672   b->singlemalloc = 1;
673 
674   b->i[0] = 0;
675   for (i=1; i<mbs+1; i++) {
676     b->i[i] = b->i[i-1] + b->imax[i-1];
677   }
678 
679   /* b->ilen will count nonzeros in each block row so far. */
680   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
681   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
682   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
683 
684   b->bs               = bs;
685   b->mbs              = mbs;
686   b->nz               = 0;
687   b->maxnz            = nz;
688   b->sorted           = 0;
689   b->roworiented      = 1;
690   b->nonew            = 0;
691   b->diag             = 0;
692   b->solve_work       = 0;
693   b->mult_work        = 0;
694   b->spptr            = 0;
695 
696   *A = B;
697   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
698   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
699   return 0;
700 }
701 
702 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
703 {
704   Mat         C;
705   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
706   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
707 
708   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
709 
710   *B = 0;
711   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
712   PLogObjectCreate(C);
713   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
714   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
715   C->destroy    = MatDestroy_SeqBAIJ;
716   C->view       = MatView_SeqBAIJ;
717   C->factor     = A->factor;
718   c->row        = 0;
719   c->col        = 0;
720   C->assembled  = PETSC_TRUE;
721 
722   c->m          = a->m;
723   c->n          = a->n;
724   c->bs         = a->bs;
725   c->mbs        = a->mbs;
726   c->nbs        = a->nbs;
727 
728   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
729   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
730   for ( i=0; i<mbs; i++ ) {
731     c->imax[i] = a->imax[i];
732     c->ilen[i] = a->ilen[i];
733   }
734 
735   /* allocate the matrix space */
736   c->singlemalloc = 1;
737   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
738   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
739   c->j  = (int *) (c->a + nz*bs*bs);
740   c->i  = c->j + nz;
741   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
742   if (mbs > 0) {
743     PetscMemcpy(c->j,a->j,nz*sizeof(int));
744     if (cpvalues == COPY_VALUES) {
745       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
746     }
747   }
748 
749   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
750   c->sorted      = a->sorted;
751   c->roworiented = a->roworiented;
752   c->nonew       = a->nonew;
753 
754   if (a->diag) {
755     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
756     PLogObjectMemory(C,(mbs+1)*sizeof(int));
757     for ( i=0; i<mbs; i++ ) {
758       c->diag[i] = a->diag[i];
759     }
760   }
761   else c->diag          = 0;
762   c->nz                 = a->nz;
763   c->maxnz              = a->maxnz;
764   c->solve_work         = 0;
765   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
766   c->mult_work          = 0;
767   *B = C;
768   return 0;
769 }
770 
771 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
772 {
773   Mat_SeqBAIJ  *a;
774   Mat          B;
775   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
776   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
777   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
778   int          *masked, nmask,tmp,ishift,bs2;
779   Scalar       *aa;
780   MPI_Comm     comm = ((PetscObject) viewer)->comm;
781 
782   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
783   bs2  = bs*bs;
784 
785   MPI_Comm_size(comm,&size);
786   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
787   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
788   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
789   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
790   M = header[1]; N = header[2]; nz = header[3];
791 
792   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
793 
794   /*
795      This code adds extra rows to make sure the number of rows is
796     divisible by the blocksize
797   */
798   mbs        = M/bs;
799   extra_rows = bs - M + bs*(mbs);
800   if (extra_rows == bs) extra_rows = 0;
801   else                  mbs++;
802   if (extra_rows) {
803     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
804   }
805 
806   /* read in row lengths */
807   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
808   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
809   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
810 
811   /* read in column indices */
812   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
813   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
814   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
815 
816   /* loop over row lengths determining block row lengths */
817   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
818   PetscMemzero(browlengths,mbs*sizeof(int));
819   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
820   PetscMemzero(mask,mbs*sizeof(int));
821   masked = mask + mbs;
822   rowcount = 0; nzcount = 0;
823   for ( i=0; i<mbs; i++ ) {
824     nmask = 0;
825     for ( j=0; j<bs; j++ ) {
826       kmax = rowlengths[rowcount];
827       for ( k=0; k<kmax; k++ ) {
828         tmp = jj[nzcount++]/bs;
829         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
830       }
831       rowcount++;
832     }
833     browlengths[i] += nmask;
834     /* zero out the mask elements we set */
835     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
836   }
837 
838   /* create our matrix */
839   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
840          CHKERRQ(ierr);
841   B = *A;
842   a = (Mat_SeqBAIJ *) B->data;
843 
844   /* set matrix "i" values */
845   a->i[0] = 0;
846   for ( i=1; i<= mbs; i++ ) {
847     a->i[i]      = a->i[i-1] + browlengths[i-1];
848     a->ilen[i-1] = browlengths[i-1];
849   }
850   a->nz = 0;
851   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
852 
853   /* read in nonzero values */
854   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
855   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
856   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
857 
858   /* set "a" and "j" values into matrix */
859   nzcount = 0; jcount = 0;
860   for ( i=0; i<mbs; i++ ) {
861     nzcountb = nzcount;
862     nmask    = 0;
863     for ( j=0; j<bs; j++ ) {
864       kmax = rowlengths[i*bs+j];
865       for ( k=0; k<kmax; k++ ) {
866         tmp = jj[nzcount++]/bs;
867 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
868       }
869       rowcount++;
870     }
871     /* sort the masked values */
872     PetscSortInt(nmask,masked);
873 
874     /* set "j" values into matrix */
875     maskcount = 1;
876     for ( j=0; j<nmask; j++ ) {
877       a->j[jcount++]  = masked[j];
878       mask[masked[j]] = maskcount++;
879     }
880     /* set "a" values into matrix */
881     ishift = bs2*a->i[i];
882     for ( j=0; j<bs; j++ ) {
883       kmax = rowlengths[i*bs+j];
884       for ( k=0; k<kmax; k++ ) {
885         tmp    = jj[nzcountb]/bs ;
886         block  = mask[tmp] - 1;
887         point  = jj[nzcountb] - bs*tmp;
888         idx    = ishift + bs2*block + j + bs*point;
889         a->a[idx] = aa[nzcountb++];
890       }
891     }
892     /* zero out the mask elements we set */
893     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
894   }
895   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
896 
897   PetscFree(rowlengths);
898   PetscFree(browlengths);
899   PetscFree(aa);
900   PetscFree(jj);
901   PetscFree(mask);
902 
903   B->assembled = PETSC_TRUE;
904 
905   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
906   if (flg) {
907     Viewer tviewer;
908     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
909     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
910     ierr = MatView(B,tviewer); CHKERRQ(ierr);
911     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
912   }
913   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
914   if (flg) {
915     Viewer tviewer;
916     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
917     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
918     ierr = MatView(B,tviewer); CHKERRQ(ierr);
919     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
920   }
921   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
922   if (flg) {
923     Viewer tviewer;
924     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
925     ierr = MatView(B,tviewer); CHKERRQ(ierr);
926     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
927   }
928   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
929   if (flg) {
930     Viewer tviewer;
931     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
932     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
933     ierr = MatView(B,tviewer); CHKERRQ(ierr);
934     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
935   }
936   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
937   if (flg) {
938     Viewer tviewer;
939     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
940     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
941     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
942     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
943   }
944   return 0;
945 }
946 
947 
948 
949