xref: /petsc/src/mat/impls/baij/seq/baij.c (revision deebb3c3fd3213c602f3e5660fb3343d8a7e852b)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.23 1996/03/31 16:51:11 bsmith Exp curfman $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
225 {
226   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227   *m = a->m; *n = a->n;
228   return 0;
229 }
230 
231 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
232 {
233   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
234   *m = 0; *n = a->m;
235   return 0;
236 }
237 
238 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
239 {
240   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
241   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i;
242   Scalar      *aa,*v_i,*aa_i;
243 
244   bs = a->bs;
245   ai = a->i;
246   aj = a->j;
247   aa = a->a;
248 
249   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
250 
251   bn  = row/bs;   /* Block number */
252   bp  = row % bs; /* Block Position */
253   M   = ai[bn+1] - ai[bn];
254   *nz = bs*M;
255 
256   if (v) {
257     *v = 0;
258     if (*nz) {
259       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
260       for ( i=0; i<M; i++ ) { /* for each block in the block row */
261         v_i  = *v + i*bs;
262         aa_i = aa + bs*bs*(ai[bn] + i);
263         for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];}
264       }
265     }
266   }
267 
268   if (idx) {
269     *idx = 0;
270     if (*nz) {
271       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
272       for ( i=0; i<M; i++ ) { /* for each block in the block row */
273         idx_i = *idx + i*bs;
274         itmp  = bs*aj[ai[bn] + i];
275         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
276       }
277     }
278   }
279   return 0;
280 }
281 
282 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
283 {
284   if (idx) {if (*idx) PetscFree(*idx);}
285   if (v)   {if (*v)   PetscFree(*v);}
286   return 0;
287 }
288 
289 static int MatZeroEntries_SeqBAIJ(Mat A)
290 {
291   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
292   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
293   return 0;
294 }
295 
296 int MatDestroy_SeqBAIJ(PetscObject obj)
297 {
298   Mat         A  = (Mat) obj;
299   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
300 
301 #if defined(PETSC_LOG)
302   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
303 #endif
304   PetscFree(a->a);
305   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
306   if (a->diag) PetscFree(a->diag);
307   if (a->ilen) PetscFree(a->ilen);
308   if (a->imax) PetscFree(a->imax);
309   if (a->solve_work) PetscFree(a->solve_work);
310   if (a->mult_work) PetscFree(a->mult_work);
311   PetscFree(a);
312   PLogObjectDestroy(A);
313   PetscHeaderDestroy(A);
314   return 0;
315 }
316 
317 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
318 {
319   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
320   if      (op == ROW_ORIENTED)              a->roworiented = 1;
321   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
322   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
323   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
324   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
325   else if (op == ROWS_SORTED ||
326            op == SYMMETRIC_MATRIX ||
327            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
328            op == YES_NEW_DIAGONALS)
329     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
330   else if (op == NO_NEW_DIAGONALS)
331     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
332   else
333     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
334   return 0;
335 }
336 
337 
338 /* -------------------------------------------------------*/
339 /* Should check that shapes of vectors and matrices match */
340 /* -------------------------------------------------------*/
341 #include "pinclude/plapack.h"
342 
343 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
344 {
345   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
346   Scalar          *xg,*yg;
347   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
348   register Scalar x1,x2,x3,x4,x5;
349   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
350   int             bs = a->bs,j,n,bs2 = bs*bs;
351 
352   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
353   PetscMemzero(y,m*sizeof(Scalar));
354   x     = x;
355   idx   = a->j;
356   v     = a->a;
357   ii    = a->i;
358 
359   switch (bs) {
360     case 1:
361      for ( i=0; i<m; i++ ) {
362        n    = ii[1] - ii[0]; ii++;
363        sum  = 0.0;
364        while (n--) sum += *v++ * x[*idx++];
365        y[i] = sum;
366       }
367       break;
368     case 2:
369       for ( i=0; i<mbs; i++ ) {
370         n  = ii[1] - ii[0]; ii++;
371         sum1 = 0.0; sum2 = 0.0;
372         for ( j=0; j<n; j++ ) {
373           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
374           sum1 += v[0]*x1 + v[2]*x2;
375           sum2 += v[1]*x1 + v[3]*x2;
376           v += 4;
377         }
378         y[0] += sum1; y[1] += sum2;
379         y += 2;
380       }
381       break;
382     case 3:
383       for ( i=0; i<mbs; i++ ) {
384         n  = ii[1] - ii[0]; ii++;
385         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
386         for ( j=0; j<n; j++ ) {
387           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
388           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
389           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
390           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
391           v += 9;
392         }
393         y[0] += sum1; y[1] += sum2; y[2] += sum3;
394         y += 3;
395       }
396       break;
397     case 4:
398       for ( i=0; i<mbs; i++ ) {
399         n  = ii[1] - ii[0]; ii++;
400         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
401         for ( j=0; j<n; j++ ) {
402           xb = x + 4*(*idx++);
403           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
404           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
405           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
406           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
407           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
408           v += 16;
409         }
410         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
411         y += 4;
412       }
413       break;
414     case 5:
415       for ( i=0; i<mbs; i++ ) {
416         n  = ii[1] - ii[0]; ii++;
417         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
418         for ( j=0; j<n; j++ ) {
419           xb = x + 5*(*idx++);
420           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
421           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
422           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
423           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
424           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
425           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
426           v += 25;
427         }
428         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
429         y += 5;
430       }
431       break;
432       /* block sizes larger then 5 by 5 are handled by BLAS */
433     default: {
434       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
435       if (!a->mult_work) {
436         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
437         CHKPTRQ(a->mult_work);
438       }
439       work = a->mult_work;
440       for ( i=0; i<mbs; i++ ) {
441         n     = ii[1] - ii[0]; ii++;
442         ncols = n*bs;
443         workt = work;
444         for ( j=0; j<n; j++ ) {
445           xb = x + bs*(*idx++);
446           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
447           workt += bs;
448         }
449         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
450         v += n*bs2;
451         y += bs;
452       }
453     }
454   }
455   PLogFlops(2*bs2*a->nz - m);
456   return 0;
457 }
458 
459 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
460 {
461   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
462   if (nz)  *nz  = a->bs*a->bs*a->nz;
463   if (nza) *nza = a->maxnz;
464   if (mem) *mem = (int)A->mem;
465   return 0;
466 }
467 
468 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
469 {
470   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
471 
472   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
473 
474   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
475   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
476       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
477     *flg = PETSC_FALSE; return 0;
478   }
479 
480   /* if the a->i are the same */
481   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
482     *flg = PETSC_FALSE; return 0;
483   }
484 
485   /* if a->j are the same */
486   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
487     *flg = PETSC_FALSE; return 0;
488   }
489 
490   /* if a->a are the same */
491   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
492     *flg = PETSC_FALSE; return 0;
493   }
494   *flg = PETSC_TRUE;
495   return 0;
496 
497 }
498 
499 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
500 {
501   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
502   int         i,j,k,n,row,bs,*ai,*aj,ambs;
503   Scalar      *x, zero = 0.0,*aa,*aa_j;
504 
505   bs  = a->bs;
506   aa   = a->a;
507   ai   = a->i;
508   aj   = a->j;
509   ambs = a->mbs;
510 
511   VecSet(&zero,v);
512   VecGetArray(v,&x); VecGetLocalSize(v,&n);
513   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
514   for ( i=0; i<ambs; i++ ) {
515     for ( j=ai[i]; j<ai[i+1]; j++ ) {
516       if (aj[j] == i) {
517         row  = i*bs;
518         aa_j = aa+j*bs*bs;
519         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
520         break;
521       }
522     }
523   }
524   return 0;
525 }
526 
527 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
528 {
529   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
530   Scalar      *l,*r,x,*v,*aa,*li,*ri;
531   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs;
532 
533   ai  = a->i;
534   aj  = a->j;
535   aa  = a->a;
536   m   = a->m;
537   n   = a->n;
538   bs  = a->bs;
539   mbs = a->mbs;
540 
541   if (ll) {
542     VecGetArray(ll,&l); VecGetSize(ll,&lm);
543     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
544     for ( i=0; i<mbs; i++ ) { /* for each block row */
545       M  = ai[i+1] - ai[i];
546       li = l + i*bs;
547       v  = aa + bs*bs*ai[i];
548       for ( j=0; j<M; j++ ) { /* for each block */
549         for ( k=0; k<bs*bs; k++ ) {
550           (*v++) *= li[k%bs];
551         }
552       }
553     }
554   }
555 
556   if (rr) {
557     VecGetArray(rr,&r); VecGetSize(rr,&rn);
558     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
559     for ( i=0; i<mbs; i++ ) { /* for each block row */
560       M  = ai[i+1] - ai[i];
561       v  = aa + bs*bs*ai[i];
562       for ( j=0; j<M; j++ ) { /* for each block */
563         ri = r + bs*aj[ai[i]+j];
564         for ( k=0; k<bs; k++ ) {
565           x = ri[k];
566           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
567         }
568       }
569     }
570   }
571   return 0;
572 }
573 
574 
575 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
576 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
577 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
578 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
579 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
580 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
581 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
582 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
583 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
584 
585 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
586 {
587   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
588   Scalar      *v = a->a;
589   double      sum = 0.0;
590   int         i;
591 
592   if (type == NORM_FROBENIUS) {
593     for (i=0; i<a->nz; i++ ) {
594 #if defined(PETSC_COMPLEX)
595       sum += real(conj(*v)*(*v)); v++;
596 #else
597       sum += (*v)*(*v); v++;
598 #endif
599     }
600     *norm = sqrt(sum);
601   }
602   else {
603     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
604   }
605   return 0;
606 }
607 
608 /*
609      note: This can only work for identity for row and col. It would
610    be good to check this and otherwise generate an error.
611 */
612 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
613 {
614   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
615   Mat         outA;
616   int         ierr;
617 
618   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
619 
620   outA          = inA;
621   inA->factor   = FACTOR_LU;
622   a->row        = row;
623   a->col        = col;
624 
625   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
626 
627   if (!a->diag) {
628     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
629   }
630   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
631   return 0;
632 }
633 
634 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
635 {
636   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
637   int         one = 1, totalnz = a->bs*a->bs*a->nz;
638   BLscal_( &totalnz, alpha, a->a, &one );
639   PLogFlops(totalnz);
640   return 0;
641 }
642 
643 int MatPrintHelp_SeqBAIJ(Mat A)
644 {
645   static int called = 0;
646 
647   if (called) return 0; else called = 1;
648   return 0;
649 }
650 /* -------------------------------------------------------------------*/
651 static struct _MatOps MatOps = {0,
652        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
653        MatMult_SeqBAIJ,0,
654        0,0,
655        MatSolve_SeqBAIJ,0,
656        0,0,
657        MatLUFactor_SeqBAIJ,0,
658        0,
659        0,
660        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
661        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
662        0,0,
663        0,
664        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
665        MatGetReordering_SeqBAIJ,
666        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
667        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
668        MatILUFactorSymbolic_SeqBAIJ,0,
669        0,0,/*  MatConvert_SeqBAIJ  */ 0,
670        0,0,
671        MatConvertSameType_SeqBAIJ,0,0,
672        MatILUFactor_SeqBAIJ,0,0,
673        0,0,
674        0,0,
675        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
676        0};
677 
678 /*@C
679    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
680    (the default parallel PETSc format).  For good matrix assembly performance
681    the user should preallocate the matrix storage by setting the parameter nz
682    (or nzz).  By setting these parameters accurately, performance can be
683    increased by more than a factor of 50.
684 
685    Input Parameters:
686 .  comm - MPI communicator, set to MPI_COMM_SELF
687 .  bs - size of block
688 .  m - number of rows
689 .  n - number of columns
690 .  nz - number of block nonzeros per block row (same for all rows)
691 .  nzz - number of block nonzeros per block row or PETSC_NULL
692          (possibly different for each row)
693 
694    Output Parameter:
695 .  A - the matrix
696 
697    Notes:
698    The BAIJ format, is fully compatible with standard Fortran 77
699    storage.  That is, the stored row and column indices can begin at
700    either one (as in Fortran) or zero.  See the users manual for details.
701 
702    Specify the preallocated storage with either nz or nnz (not both).
703    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
704    allocation.  For additional details, see the users manual chapter on
705    matrices and the file $(PETSC_DIR)/Performance.
706 
707 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
708 @*/
709 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
710 {
711   Mat         B;
712   Mat_SeqBAIJ *b;
713   int         i,len,ierr, flg,mbs = m/bs;
714 
715   if (mbs*bs != m)
716     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
717 
718   *A      = 0;
719   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
720   PLogObjectCreate(B);
721   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
722   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
723   switch (bs) {
724     case 1:
725       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
726       break;
727     case 2:
728       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
729       break;
730     case 3:
731       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
732       break;
733     case 4:
734       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
735       break;
736     case 5:
737       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
738       break;
739   }
740   B->destroy          = MatDestroy_SeqBAIJ;
741   B->view             = MatView_SeqBAIJ;
742   B->factor           = 0;
743   B->lupivotthreshold = 1.0;
744   b->row              = 0;
745   b->col              = 0;
746   b->reallocs         = 0;
747 
748   b->m       = m;
749   b->mbs     = mbs;
750   b->n       = n;
751   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
752   if (nnz == PETSC_NULL) {
753     if (nz == PETSC_DEFAULT) nz = 5;
754     else if (nz <= 0)        nz = 1;
755     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
756     nz = nz*mbs;
757   }
758   else {
759     nz = 0;
760     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
761   }
762 
763   /* allocate the matrix space */
764   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
765   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
766   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
767   b->j  = (int *) (b->a + nz*bs*bs);
768   PetscMemzero(b->j,nz*sizeof(int));
769   b->i  = b->j + nz;
770   b->singlemalloc = 1;
771 
772   b->i[0] = 0;
773   for (i=1; i<mbs+1; i++) {
774     b->i[i] = b->i[i-1] + b->imax[i-1];
775   }
776 
777   /* b->ilen will count nonzeros in each block row so far. */
778   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
779   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
780   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
781 
782   b->bs               = bs;
783   b->mbs              = mbs;
784   b->nz               = 0;
785   b->maxnz            = nz;
786   b->sorted           = 0;
787   b->roworiented      = 1;
788   b->nonew            = 0;
789   b->diag             = 0;
790   b->solve_work       = 0;
791   b->mult_work        = 0;
792   b->spptr            = 0;
793 
794   *A = B;
795   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
796   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
797   return 0;
798 }
799 
800 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
801 {
802   Mat         C;
803   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
804   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
805 
806   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
807 
808   *B = 0;
809   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
810   PLogObjectCreate(C);
811   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
812   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
813   C->destroy    = MatDestroy_SeqBAIJ;
814   C->view       = MatView_SeqBAIJ;
815   C->factor     = A->factor;
816   c->row        = 0;
817   c->col        = 0;
818   C->assembled  = PETSC_TRUE;
819 
820   c->m          = a->m;
821   c->n          = a->n;
822   c->bs         = a->bs;
823   c->mbs        = a->mbs;
824   c->nbs        = a->nbs;
825 
826   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
827   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
828   for ( i=0; i<mbs; i++ ) {
829     c->imax[i] = a->imax[i];
830     c->ilen[i] = a->ilen[i];
831   }
832 
833   /* allocate the matrix space */
834   c->singlemalloc = 1;
835   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
836   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
837   c->j  = (int *) (c->a + nz*bs*bs);
838   c->i  = c->j + nz;
839   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
840   if (mbs > 0) {
841     PetscMemcpy(c->j,a->j,nz*sizeof(int));
842     if (cpvalues == COPY_VALUES) {
843       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
844     }
845   }
846 
847   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
848   c->sorted      = a->sorted;
849   c->roworiented = a->roworiented;
850   c->nonew       = a->nonew;
851 
852   if (a->diag) {
853     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
854     PLogObjectMemory(C,(mbs+1)*sizeof(int));
855     for ( i=0; i<mbs; i++ ) {
856       c->diag[i] = a->diag[i];
857     }
858   }
859   else c->diag          = 0;
860   c->nz                 = a->nz;
861   c->maxnz              = a->maxnz;
862   c->solve_work         = 0;
863   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
864   c->mult_work          = 0;
865   *B = C;
866   return 0;
867 }
868 
869 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
870 {
871   Mat_SeqBAIJ  *a;
872   Mat          B;
873   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
874   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
875   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
876   int          *masked, nmask,tmp,ishift,bs2;
877   Scalar       *aa;
878   MPI_Comm     comm = ((PetscObject) viewer)->comm;
879 
880   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
881   bs2  = bs*bs;
882 
883   MPI_Comm_size(comm,&size);
884   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
885   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
886   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
887   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
888   M = header[1]; N = header[2]; nz = header[3];
889 
890   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
891 
892   /*
893      This code adds extra rows to make sure the number of rows is
894     divisible by the blocksize
895   */
896   mbs        = M/bs;
897   extra_rows = bs - M + bs*(mbs);
898   if (extra_rows == bs) extra_rows = 0;
899   else                  mbs++;
900   if (extra_rows) {
901     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
902   }
903 
904   /* read in row lengths */
905   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
906   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
907   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
908 
909   /* read in column indices */
910   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
911   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
912   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
913 
914   /* loop over row lengths determining block row lengths */
915   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
916   PetscMemzero(browlengths,mbs*sizeof(int));
917   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
918   PetscMemzero(mask,mbs*sizeof(int));
919   masked = mask + mbs;
920   rowcount = 0; nzcount = 0;
921   for ( i=0; i<mbs; i++ ) {
922     nmask = 0;
923     for ( j=0; j<bs; j++ ) {
924       kmax = rowlengths[rowcount];
925       for ( k=0; k<kmax; k++ ) {
926         tmp = jj[nzcount++]/bs;
927         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
928       }
929       rowcount++;
930     }
931     browlengths[i] += nmask;
932     /* zero out the mask elements we set */
933     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
934   }
935 
936   /* create our matrix */
937   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
938          CHKERRQ(ierr);
939   B = *A;
940   a = (Mat_SeqBAIJ *) B->data;
941 
942   /* set matrix "i" values */
943   a->i[0] = 0;
944   for ( i=1; i<= mbs; i++ ) {
945     a->i[i]      = a->i[i-1] + browlengths[i-1];
946     a->ilen[i-1] = browlengths[i-1];
947   }
948   a->indexshift = 0;
949   a->nz         = 0;
950   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
951 
952   /* read in nonzero values */
953   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
954   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
955   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
956 
957   /* set "a" and "j" values into matrix */
958   nzcount = 0; jcount = 0;
959   for ( i=0; i<mbs; i++ ) {
960     nzcountb = nzcount;
961     nmask    = 0;
962     for ( j=0; j<bs; j++ ) {
963       kmax = rowlengths[i*bs+j];
964       for ( k=0; k<kmax; k++ ) {
965         tmp = jj[nzcount++]/bs;
966 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
967       }
968       rowcount++;
969     }
970     /* sort the masked values */
971     PetscSortInt(nmask,masked);
972 
973     /* set "j" values into matrix */
974     maskcount = 1;
975     for ( j=0; j<nmask; j++ ) {
976       a->j[jcount++]  = masked[j];
977       mask[masked[j]] = maskcount++;
978     }
979     /* set "a" values into matrix */
980     ishift = bs2*a->i[i];
981     for ( j=0; j<bs; j++ ) {
982       kmax = rowlengths[i*bs+j];
983       for ( k=0; k<kmax; k++ ) {
984         tmp    = jj[nzcountb]/bs ;
985         block  = mask[tmp] - 1;
986         point  = jj[nzcountb] - bs*tmp;
987         idx    = ishift + bs2*block + j + bs*point;
988         a->a[idx] = aa[nzcountb++];
989       }
990     }
991     /* zero out the mask elements we set */
992     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
993   }
994   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
995 
996   PetscFree(rowlengths);
997   PetscFree(browlengths);
998   PetscFree(aa);
999   PetscFree(jj);
1000   PetscFree(mask);
1001 
1002   B->assembled = PETSC_TRUE;
1003 
1004   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1005   if (flg) {
1006     Viewer tviewer;
1007     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1008     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1009     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1010     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1011   }
1012   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1013   if (flg) {
1014     Viewer tviewer;
1015     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1016     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1017     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1018     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1019   }
1020   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1021   if (flg) {
1022     Viewer tviewer;
1023     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1024     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1025     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1026   }
1027   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1028   if (flg) {
1029     Viewer tviewer;
1030     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1031     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1032     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1033     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1034   }
1035   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1036   if (flg) {
1037     Viewer tviewer;
1038     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1039     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1040     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1041     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1042   }
1043   return 0;
1044 }
1045 
1046 
1047 
1048