xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f8af6adcbfb1e5c7871c80e05df796d258e7eb76)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.18 1996/03/25 23:39:02 balay Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 
225 static int MatZeroEntries_SeqBAIJ(Mat A)
226 {
227   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
228   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
229   return 0;
230 }
231 
232 int MatDestroy_SeqBAIJ(PetscObject obj)
233 {
234   Mat        A  = (Mat) obj;
235   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
236 
237 #if defined(PETSC_LOG)
238   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
239 #endif
240   PetscFree(a->a);
241   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
242   if (a->diag) PetscFree(a->diag);
243   if (a->ilen) PetscFree(a->ilen);
244   if (a->imax) PetscFree(a->imax);
245   if (a->solve_work) PetscFree(a->solve_work);
246   if (a->mult_work) PetscFree(a->mult_work);
247   PetscFree(a);
248   return 0;
249 }
250 
251 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
252 {
253   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
254   if      (op == ROW_ORIENTED)              a->roworiented = 1;
255   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
256   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
257   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
258   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
259   else if (op == ROWS_SORTED ||
260            op == SYMMETRIC_MATRIX ||
261            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
262            op == YES_NEW_DIAGONALS)
263     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
264   else if (op == NO_NEW_DIAGONALS)
265     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
266   else
267     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
268   return 0;
269 }
270 
271 
272 /* -------------------------------------------------------*/
273 /* Should check that shapes of vectors and matrices match */
274 /* -------------------------------------------------------*/
275 #include "pinclude/plapack.h"
276 
277 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
278 {
279   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
280   Scalar          *xg,*yg;
281   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
282   register Scalar x1,x2,x3,x4,x5;
283   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
284   int             bs = a->bs,j,n,bs2 = bs*bs;
285 
286   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
287   PetscMemzero(y,m*sizeof(Scalar));
288   x     = x;
289   idx   = a->j;
290   v     = a->a;
291   ii    = a->i;
292 
293   switch (bs) {
294     case 1:
295      for ( i=0; i<m; i++ ) {
296        n    = ii[1] - ii[0]; ii++;
297        sum  = 0.0;
298        while (n--) sum += *v++ * x[*idx++];
299        y[i] = sum;
300       }
301       break;
302     case 2:
303       for ( i=0; i<mbs; i++ ) {
304         n  = ii[1] - ii[0]; ii++;
305         sum1 = 0.0; sum2 = 0.0;
306         for ( j=0; j<n; j++ ) {
307           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
308           sum1 += v[0]*x1 + v[2]*x2;
309           sum2 += v[1]*x1 + v[3]*x2;
310           v += 4;
311         }
312         y[0] += sum1; y[1] += sum2;
313         y += 2;
314       }
315       break;
316     case 3:
317       for ( i=0; i<mbs; i++ ) {
318         n  = ii[1] - ii[0]; ii++;
319         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
320         for ( j=0; j<n; j++ ) {
321           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
322           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
323           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
324           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
325           v += 9;
326         }
327         y[0] += sum1; y[1] += sum2; y[2] += sum3;
328         y += 3;
329       }
330       break;
331     case 4:
332       for ( i=0; i<mbs; i++ ) {
333         n  = ii[1] - ii[0]; ii++;
334         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
335         for ( j=0; j<n; j++ ) {
336           xb = x + 4*(*idx++);
337           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
338           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
339           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
340           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
341           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
342           v += 16;
343         }
344         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
345         y += 4;
346       }
347       break;
348     case 5:
349       for ( i=0; i<mbs; i++ ) {
350         n  = ii[1] - ii[0]; ii++;
351         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
352         for ( j=0; j<n; j++ ) {
353           xb = x + 5*(*idx++);
354           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
355           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
356           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
357           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
358           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
359           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
360           v += 25;
361         }
362         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
363         y += 5;
364       }
365       break;
366       /* block sizes larger then 5 by 5 are handled by BLAS */
367     default: {
368       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
369       if (!a->mult_work) {
370         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
371         CHKPTRQ(a->mult_work);
372       }
373       work = a->mult_work;
374       for ( i=0; i<mbs; i++ ) {
375         n     = ii[1] - ii[0]; ii++;
376         ncols = n*bs;
377         workt = work;
378         for ( j=0; j<n; j++ ) {
379           xb = x + bs*(*idx++);
380           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
381           workt += bs;
382         }
383         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
384         v += n*bs2;
385         y += bs;
386       }
387     }
388   }
389   PLogFlops(2*bs2*a->nz - m);
390   return 0;
391 }
392 
393 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
394 {
395   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
396   if (nz)  *nz  = a->bs*a->bs*a->nz;
397   if (nza) *nza = a->maxnz;
398   if (mem) *mem = (int)A->mem;
399   return 0;
400 }
401 
402 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
403 {
404   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
405 
406   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
407 
408   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
409   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
410       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
411     *flg = PETSC_FALSE; return 0;
412   }
413 
414   /* if the a->i are the same */
415   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
416     *flg = PETSC_FALSE; return 0;
417   }
418 
419   /* if a->j are the same */
420   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
421     *flg = PETSC_FALSE; return 0;
422   }
423 
424   /* if a->a are the same */
425   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
426     *flg = PETSC_FALSE; return 0;
427   }
428   *flg = PETSC_TRUE;
429   return 0;
430 
431 }
432 
433 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
434 {
435   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
436   int        i,j,k,n,row,bs,*ai,*aj,ambs;
437   Scalar     *x, zero = 0.0,*aa,*aa_j;
438 
439   bs  = a->bs;
440   aa   = a->a;
441   ai   = a->i;
442   aj   = a->j;
443   ambs = a->mbs;
444 
445   VecSet(&zero,v);
446   VecGetArray(v,&x); VecGetLocalSize(v,&n);
447   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqAIJ:Nonconforming matrix and vector");
448   for ( i=0; i<ambs; i++ ) {
449     for ( j=ai[i]; j<ai[i+1]; j++ ) {
450       if (aj[j] == i) {
451         row  = i*bs;
452         aa_j = aa+j*bs*bs;
453         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
454         break;
455       }
456     }
457   }
458   return 0;
459 }
460 
461 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
462 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
463 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
464 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
465 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
466 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
467 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
468 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
469 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
470 
471 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
472 {
473   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
474   *m = a->m; *n = a->n;
475   return 0;
476 }
477 
478 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
479 {
480   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
481   *m = 0; *n = a->m;
482   return 0;
483 }
484 
485 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
486 {
487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
488   Scalar      *v = a->a;
489   double      sum = 0.0;
490   int         i;
491 
492   if (type == NORM_FROBENIUS) {
493     for (i=0; i<a->nz; i++ ) {
494 #if defined(PETSC_COMPLEX)
495       sum += real(conj(*v)*(*v)); v++;
496 #else
497       sum += (*v)*(*v); v++;
498 #endif
499     }
500     *norm = sqrt(sum);
501   }
502   else {
503     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
504   }
505   return 0;
506 }
507 
508 /*
509      note: This can only work for identity for row and col. It would
510    be good to check this and otherwise generate an error.
511 */
512 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
513 {
514   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
515   Mat         outA;
516   int         ierr;
517 
518   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
519 
520   outA          = inA;
521   inA->factor   = FACTOR_LU;
522   a->row        = row;
523   a->col        = col;
524 
525   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
526 
527   if (!a->diag) {
528     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
529   }
530   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
531   return 0;
532 }
533 
534 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
535 {
536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
537   int         one = 1, totalnz = a->bs*a->bs*a->nz;
538   BLscal_( &totalnz, alpha, a->a, &one );
539   PLogFlops(totalnz);
540   return 0;
541 }
542 
543 int MatPrintHelp_SeqBAIJ(Mat A)
544 {
545   static int called = 0;
546 
547   if (called) return 0; else called = 1;
548   return 0;
549 }
550 /* -------------------------------------------------------------------*/
551 static struct _MatOps MatOps = {0,
552        0,0,
553        MatMult_SeqBAIJ,0,
554        0,0,
555        MatSolve_SeqBAIJ,0,
556        0,0,
557        MatLUFactor_SeqBAIJ,0,
558        0,
559        0,
560        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
561        MatGetDiagonal_SeqBAIJ,0,MatNorm_SeqBAIJ,
562        0,0,
563        0,
564        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
565        MatGetReordering_SeqBAIJ,
566        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
567        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
568        MatILUFactorSymbolic_SeqBAIJ,0,
569        0,0,/*  MatConvert_SeqBAIJ  */ 0,
570        0,0,
571        MatConvertSameType_SeqBAIJ,0,0,
572        MatILUFactor_SeqBAIJ,0,0,
573        0,0,
574        0,0,
575        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
576        0};
577 
578 /*@C
579    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
580    (the default parallel PETSc format).  For good matrix assembly performance
581    the user should preallocate the matrix storage by setting the parameter nz
582    (or nzz).  By setting these parameters accurately, performance can be
583    increased by more than a factor of 50.
584 
585    Input Parameters:
586 .  comm - MPI communicator, set to MPI_COMM_SELF
587 .  bs - size of block
588 .  m - number of rows
589 .  n - number of columns
590 .  nz - number of block nonzeros per block row (same for all rows)
591 .  nzz - number of block nonzeros per block row or PETSC_NULL
592          (possibly different for each row)
593 
594    Output Parameter:
595 .  A - the matrix
596 
597    Notes:
598    The BAIJ format, is fully compatible with standard Fortran 77
599    storage.  That is, the stored row and column indices can begin at
600    either one (as in Fortran) or zero.  See the users manual for details.
601 
602    Specify the preallocated storage with either nz or nnz (not both).
603    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
604    allocation.  For additional details, see the users manual chapter on
605    matrices and the file $(PETSC_DIR)/Performance.
606 
607 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
608 @*/
609 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
610 {
611   Mat         B;
612   Mat_SeqBAIJ *b;
613   int         i,len,ierr, flg,mbs = m/bs;
614 
615   if (mbs*bs != m)
616     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
617 
618   *A      = 0;
619   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
620   PLogObjectCreate(B);
621   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
622   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
623   switch (bs) {
624     case 1:
625       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
626       break;
627     case 2:
628       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
629       break;
630     case 3:
631       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
632       break;
633     case 4:
634       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
635       break;
636     case 5:
637       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
638       break;
639   }
640   B->destroy          = MatDestroy_SeqBAIJ;
641   B->view             = MatView_SeqBAIJ;
642   B->factor           = 0;
643   B->lupivotthreshold = 1.0;
644   b->row              = 0;
645   b->col              = 0;
646   b->reallocs         = 0;
647 
648   b->m       = m;
649   b->mbs     = mbs;
650   b->n       = n;
651   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
652   if (nnz == PETSC_NULL) {
653     if (nz == PETSC_DEFAULT) nz = 5;
654     else if (nz <= 0)        nz = 1;
655     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
656     nz = nz*mbs;
657   }
658   else {
659     nz = 0;
660     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
661   }
662 
663   /* allocate the matrix space */
664   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
665   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
666   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
667   b->j  = (int *) (b->a + nz*bs*bs);
668   PetscMemzero(b->j,nz*sizeof(int));
669   b->i  = b->j + nz;
670   b->singlemalloc = 1;
671 
672   b->i[0] = 0;
673   for (i=1; i<mbs+1; i++) {
674     b->i[i] = b->i[i-1] + b->imax[i-1];
675   }
676 
677   /* b->ilen will count nonzeros in each block row so far. */
678   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
679   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
680   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
681 
682   b->bs               = bs;
683   b->mbs              = mbs;
684   b->nz               = 0;
685   b->maxnz            = nz;
686   b->sorted           = 0;
687   b->roworiented      = 1;
688   b->nonew            = 0;
689   b->diag             = 0;
690   b->solve_work       = 0;
691   b->mult_work        = 0;
692   b->spptr            = 0;
693 
694   *A = B;
695   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
696   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
697   return 0;
698 }
699 
700 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
701 {
702   Mat         C;
703   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
704   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
705 
706   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
707 
708   *B = 0;
709   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
710   PLogObjectCreate(C);
711   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
712   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
713   C->destroy    = MatDestroy_SeqBAIJ;
714   C->view       = MatView_SeqBAIJ;
715   C->factor     = A->factor;
716   c->row        = 0;
717   c->col        = 0;
718   C->assembled  = PETSC_TRUE;
719 
720   c->m          = a->m;
721   c->n          = a->n;
722   c->bs         = a->bs;
723   c->mbs        = a->mbs;
724   c->nbs        = a->nbs;
725 
726   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
727   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
728   for ( i=0; i<mbs; i++ ) {
729     c->imax[i] = a->imax[i];
730     c->ilen[i] = a->ilen[i];
731   }
732 
733   /* allocate the matrix space */
734   c->singlemalloc = 1;
735   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
736   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
737   c->j  = (int *) (c->a + nz*bs*bs);
738   c->i  = c->j + nz;
739   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
740   if (mbs > 0) {
741     PetscMemcpy(c->j,a->j,nz*sizeof(int));
742     if (cpvalues == COPY_VALUES) {
743       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
744     }
745   }
746 
747   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
748   c->sorted      = a->sorted;
749   c->roworiented = a->roworiented;
750   c->nonew       = a->nonew;
751 
752   if (a->diag) {
753     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
754     PLogObjectMemory(C,(mbs+1)*sizeof(int));
755     for ( i=0; i<mbs; i++ ) {
756       c->diag[i] = a->diag[i];
757     }
758   }
759   else c->diag          = 0;
760   c->nz                 = a->nz;
761   c->maxnz              = a->maxnz;
762   c->solve_work         = 0;
763   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
764   c->mult_work          = 0;
765   *B = C;
766   return 0;
767 }
768 
769 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
770 {
771   Mat_SeqBAIJ  *a;
772   Mat          B;
773   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
774   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
775   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
776   int          *masked, nmask,tmp,ishift,bs2;
777   Scalar       *aa;
778   MPI_Comm     comm = ((PetscObject) viewer)->comm;
779 
780   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
781   bs2  = bs*bs;
782 
783   MPI_Comm_size(comm,&size);
784   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
785   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
786   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
787   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
788   M = header[1]; N = header[2]; nz = header[3];
789 
790   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
791 
792   /*
793      This code adds extra rows to make sure the number of rows is
794     divisible by the blocksize
795   */
796   mbs        = M/bs;
797   extra_rows = bs - M + bs*(mbs);
798   if (extra_rows == bs) extra_rows = 0;
799   else                  mbs++;
800   if (extra_rows) {
801     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
802   }
803 
804   /* read in row lengths */
805   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
806   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
807   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
808 
809   /* read in column indices */
810   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
811   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
812   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
813 
814   /* loop over row lengths determining block row lengths */
815   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
816   PetscMemzero(browlengths,mbs*sizeof(int));
817   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
818   PetscMemzero(mask,mbs*sizeof(int));
819   masked = mask + mbs;
820   rowcount = 0; nzcount = 0;
821   for ( i=0; i<mbs; i++ ) {
822     nmask = 0;
823     for ( j=0; j<bs; j++ ) {
824       kmax = rowlengths[rowcount];
825       for ( k=0; k<kmax; k++ ) {
826         tmp = jj[nzcount++]/bs;
827         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
828       }
829       rowcount++;
830     }
831     browlengths[i] += nmask;
832     /* zero out the mask elements we set */
833     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
834   }
835 
836   /* create our matrix */
837   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
838          CHKERRQ(ierr);
839   B = *A;
840   a = (Mat_SeqBAIJ *) B->data;
841 
842   /* set matrix "i" values */
843   a->i[0] = 0;
844   for ( i=1; i<= mbs; i++ ) {
845     a->i[i]      = a->i[i-1] + browlengths[i-1];
846     a->ilen[i-1] = browlengths[i-1];
847   }
848   a->nz = 0;
849   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
850 
851   /* read in nonzero values */
852   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
853   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
854   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
855 
856   /* set "a" and "j" values into matrix */
857   nzcount = 0; jcount = 0;
858   for ( i=0; i<mbs; i++ ) {
859     nzcountb = nzcount;
860     nmask    = 0;
861     for ( j=0; j<bs; j++ ) {
862       kmax = rowlengths[i*bs+j];
863       for ( k=0; k<kmax; k++ ) {
864         tmp = jj[nzcount++]/bs;
865 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
866       }
867       rowcount++;
868     }
869     /* sort the masked values */
870     PetscSortInt(nmask,masked);
871 
872     /* set "j" values into matrix */
873     maskcount = 1;
874     for ( j=0; j<nmask; j++ ) {
875       a->j[jcount++]  = masked[j];
876       mask[masked[j]] = maskcount++;
877     }
878     /* set "a" values into matrix */
879     ishift = bs2*a->i[i];
880     for ( j=0; j<bs; j++ ) {
881       kmax = rowlengths[i*bs+j];
882       for ( k=0; k<kmax; k++ ) {
883         tmp    = jj[nzcountb]/bs ;
884         block  = mask[tmp] - 1;
885         point  = jj[nzcountb] - bs*tmp;
886         idx    = ishift + bs2*block + j + bs*point;
887         a->a[idx] = aa[nzcountb++];
888       }
889     }
890     /* zero out the mask elements we set */
891     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
892   }
893   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
894 
895   PetscFree(rowlengths);
896   PetscFree(browlengths);
897   PetscFree(aa);
898   PetscFree(jj);
899   PetscFree(mask);
900 
901   B->assembled = PETSC_TRUE;
902 
903   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
904   if (flg) {
905     Viewer tviewer;
906     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
907     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
908     ierr = MatView(B,tviewer); CHKERRQ(ierr);
909     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
910   }
911   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
912   if (flg) {
913     Viewer tviewer;
914     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
915     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
916     ierr = MatView(B,tviewer); CHKERRQ(ierr);
917     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
918   }
919   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
920   if (flg) {
921     Viewer tviewer;
922     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
923     ierr = MatView(B,tviewer); CHKERRQ(ierr);
924     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
925   }
926   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
927   if (flg) {
928     Viewer tviewer;
929     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
930     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
931     ierr = MatView(B,tviewer); CHKERRQ(ierr);
932     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
933   }
934   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
935   if (flg) {
936     Viewer tviewer;
937     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
938     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
939     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
940     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
941   }
942   return 0;
943 }
944 
945 
946 
947