xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2a38ea04c8b142f2fa76fbc4a8a6f5177b55a72e)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.41 1996/04/30 19:14:56 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
220 {
221   Mat         A = (Mat) obj;
222   ViewerType  vtype;
223   int         ierr;
224 
225   if (!viewer) {
226     viewer = STDOUT_VIEWER_SELF;
227   }
228 
229   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
230   if (vtype == MATLAB_VIEWER) {
231     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
232   }
233   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234     return MatView_SeqBAIJ_ASCII(A,viewer);
235   }
236   else if (vtype == BINARY_FILE_VIEWER) {
237     return MatView_SeqBAIJ_Binary(A,viewer);
238   }
239   else if (vtype == DRAW_VIEWER) {
240     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
241   }
242   return 0;
243 }
244 
245 #define CHUNKSIZE  10
246 
247 /* This version has row oriented v  */
248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249 {
250   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
254   int          ridx,cidx,bs2=a->bs2;
255   Scalar      *ap,value,*aa=a->a,*bap;
256 
257   for ( k=0; k<m; k++ ) { /* loop over added rows */
258     row  = im[k]; brow = row/bs;
259     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262     rmax = imax[brow]; nrow = ailen[brow];
263     low = 0;
264     for ( l=0; l<n; l++ ) { /* loop over added columns */
265       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267       col = in[l]; bcol = col/bs;
268       ridx = row % bs; cidx = col % bs;
269       if (roworiented) {
270         value = *v++;
271       }
272       else {
273         value = v[k + l*m];
274       }
275       if (!sorted) low = 0; high = nrow;
276       while (high-low > 5) {
277         t = (low+high)/2;
278         if (rp[t] > bcol) high = t;
279         else             low  = t;
280       }
281       for ( i=low; i<high; i++ ) {
282         if (rp[i] > bcol) break;
283         if (rp[i] == bcol) {
284           bap  = ap +  bs2*i + bs*cidx + ridx;
285           if (is == ADD_VALUES) *bap += value;
286           else                  *bap  = value;
287           goto noinsert;
288         }
289       }
290       if (nonew) goto noinsert;
291       if (nrow >= rmax) {
292         /* there is no extra room in row, therefore enlarge */
293         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294         Scalar *new_a;
295 
296         /* malloc new storage space */
297         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
299         new_j   = (int *) (new_a + bs2*new_nz);
300         new_i   = new_j + new_nz;
301 
302         /* copy over old data into new slots */
303         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
304         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308                                                            len*sizeof(int));
309         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313         /* free up old matrix storage */
314         PetscFree(a->a);
315         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317         a->singlemalloc = 1;
318 
319         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
321         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
322         a->maxnz += CHUNKSIZE;
323         a->reallocs++;
324         a->nz++;
325       }
326       N = nrow++ - 1;
327       /* shift up all the later entries in this row */
328       for ( ii=N; ii>=i; ii-- ) {
329         rp[ii+1] = rp[ii];
330          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331       }
332       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333       rp[i] = bcol;
334       ap[bs2*i + bs*cidx + ridx] = value;
335       noinsert:;
336       low = i;
337     }
338     ailen[brow] = nrow;
339   }
340   return 0;
341 }
342 
343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
344 {
345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
346   *m = a->m; *n = a->n;
347   return 0;
348 }
349 
350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = 0; *n = a->m;
354   return 0;
355 }
356 
357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
361   Scalar      *aa,*v_i,*aa_i;
362 
363   bs  = a->bs;
364   ai  = a->i;
365   aj  = a->j;
366   aa  = a->a;
367   bs2 = a->bs2;
368 
369   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
370 
371   bn  = row/bs;   /* Block number */
372   bp  = row % bs; /* Block Position */
373   M   = ai[bn+1] - ai[bn];
374   *nz = bs*M;
375 
376   if (v) {
377     *v = 0;
378     if (*nz) {
379       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
380       for ( i=0; i<M; i++ ) { /* for each block in the block row */
381         v_i  = *v + i*bs;
382         aa_i = aa + bs2*(ai[bn] + i);
383         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
384       }
385     }
386   }
387 
388   if (idx) {
389     *idx = 0;
390     if (*nz) {
391       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
392       for ( i=0; i<M; i++ ) { /* for each block in the block row */
393         idx_i = *idx + i*bs;
394         itmp  = bs*aj[ai[bn] + i];
395         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
396       }
397     }
398   }
399   return 0;
400 }
401 
402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
403 {
404   if (idx) {if (*idx) PetscFree(*idx);}
405   if (v)   {if (*v)   PetscFree(*v);}
406   return 0;
407 }
408 
409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410 {
411   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412   Mat         C;
413   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
414   int         *rows,*cols,bs2=a->bs2;
415   Scalar      *array=a->a;
416 
417   if (B==PETSC_NULL && mbs!=nbs)
418     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420   PetscMemzero(col,(1+nbs)*sizeof(int));
421 
422   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424   PetscFree(col);
425   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426   cols = rows + bs;
427   for ( i=0; i<mbs; i++ ) {
428     cols[0] = i*bs;
429     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430     len = ai[i+1] - ai[i];
431     for ( j=0; j<len; j++ ) {
432       rows[0] = (*aj++)*bs;
433       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435       array += bs2;
436     }
437   }
438   PetscFree(rows);
439 
440   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442 
443   if (B != PETSC_NULL) {
444     *B = C;
445   } else {
446     /* This isn't really an in-place transpose */
447     PetscFree(a->a);
448     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449     if (a->diag) PetscFree(a->diag);
450     if (a->ilen) PetscFree(a->ilen);
451     if (a->imax) PetscFree(a->imax);
452     if (a->solve_work) PetscFree(a->solve_work);
453     PetscFree(a);
454     PetscMemcpy(A,C,sizeof(struct _Mat));
455     PetscHeaderDestroy(C);
456   }
457   return 0;
458 }
459 
460 
461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462 {
463   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465   int        m = a->m,*ip, N, *ailen = a->ilen;
466   int        mbs = a->mbs, bs2 = a->bs2;
467   Scalar     *aa = a->a, *ap;
468 
469   if (mode == FLUSH_ASSEMBLY) return 0;
470 
471   for ( i=1; i<mbs; i++ ) {
472     /* move each row back by the amount of empty slots (fshift) before it*/
473     fshift += imax[i-1] - ailen[i-1];
474     if (fshift) {
475       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476       N = ailen[i];
477       for ( j=0; j<N; j++ ) {
478         ip[j-fshift] = ip[j];
479         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480       }
481     }
482     ai[i] = ai[i-1] + ailen[i-1];
483   }
484   if (mbs) {
485     fshift += imax[mbs-1] - ailen[mbs-1];
486     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487   }
488   /* reset ilen and imax for each row */
489   for ( i=0; i<mbs; i++ ) {
490     ailen[i] = imax[i] = ai[i+1] - ai[i];
491   }
492   a->nz = ai[mbs];
493 
494   /* diagonals may have moved, so kill the diagonal pointers */
495   if (fshift && a->diag) {
496     PetscFree(a->diag);
497     PLogObjectMemory(A,-(m+1)*sizeof(int));
498     a->diag = 0;
499   }
500   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502            a->reallocs);
503   return 0;
504 }
505 
506 static int MatZeroEntries_SeqBAIJ(Mat A)
507 {
508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
509   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
510   return 0;
511 }
512 
513 int MatDestroy_SeqBAIJ(PetscObject obj)
514 {
515   Mat         A  = (Mat) obj;
516   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
517 
518 #if defined(PETSC_LOG)
519   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
520 #endif
521   PetscFree(a->a);
522   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
523   if (a->diag) PetscFree(a->diag);
524   if (a->ilen) PetscFree(a->ilen);
525   if (a->imax) PetscFree(a->imax);
526   if (a->solve_work) PetscFree(a->solve_work);
527   if (a->mult_work) PetscFree(a->mult_work);
528   PetscFree(a);
529   PLogObjectDestroy(A);
530   PetscHeaderDestroy(A);
531   return 0;
532 }
533 
534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
535 {
536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
537   if      (op == ROW_ORIENTED)              a->roworiented = 1;
538   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
539   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
540   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
541   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
542   else if (op == ROWS_SORTED ||
543            op == SYMMETRIC_MATRIX ||
544            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
545            op == YES_NEW_DIAGONALS)
546     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
547   else if (op == NO_NEW_DIAGONALS)
548     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
549   else
550     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
551   return 0;
552 }
553 
554 
555 /* -------------------------------------------------------*/
556 /* Should check that shapes of vectors and matrices match */
557 /* -------------------------------------------------------*/
558 #include "pinclude/plapack.h"
559 
560 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz)
561 {
562   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
563   Scalar          *xg,*zg;
564   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
565   register Scalar x1,x2,x3,x4,x5;
566   int             mbs=a->mbs,i,*idx,*ii;
567   int             bs=a->bs,j,n,bs2=a->bs2,ierr;
568 
569   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
570   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
571 
572   idx   = a->j;
573   v     = a->a;
574   ii    = a->i;
575 
576   switch (bs) {
577   case 1:
578     for ( i=0; i<mbs; i++ ) {
579       n    = ii[1] - ii[0]; ii++;
580       sum  = 0.0;
581       while (n--) sum += *v++ * x[*idx++];
582       z[i] = sum;
583     }
584     break;
585   case 2:
586     for ( i=0; i<mbs; i++ ) {
587       n  = ii[1] - ii[0]; ii++;
588       sum1 = 0.0; sum2 = 0.0;
589       for ( j=0; j<n; j++ ) {
590         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
591         sum1 += v[0]*x1 + v[2]*x2;
592         sum2 += v[1]*x1 + v[3]*x2;
593         v += 4;
594       }
595       z[0] = sum1; z[1] = sum2;
596       z += 2;
597     }
598     break;
599   case 3:
600     for ( i=0; i<mbs; i++ ) {
601       n  = ii[1] - ii[0]; ii++;
602       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
603       for ( j=0; j<n; j++ ) {
604         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
605         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
606         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
607         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
608         v += 9;
609       }
610       z[0] = sum1; z[1] = sum2; z[2] = sum3;
611       z += 3;
612     }
613     break;
614   case 4:
615     for ( i=0; i<mbs; i++ ) {
616       n  = ii[1] - ii[0]; ii++;
617       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
618       for ( j=0; j<n; j++ ) {
619         xb = x + 4*(*idx++);
620         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
621         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
622         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
623         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
624         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
625         v += 16;
626       }
627       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
628       z += 4;
629     }
630     break;
631   case 5:
632     for ( i=0; i<mbs; i++ ) {
633       n  = ii[1] - ii[0]; ii++;
634       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
635       for ( j=0; j<n; j++ ) {
636         xb = x + 5*(*idx++);
637         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
638         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
639         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
640         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
641         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
642         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
643         v += 25;
644       }
645       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
646       z += 5;
647     }
648     break;
649     /* block sizes larger then 5 by 5 are handled by BLAS */
650   default: {
651       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
652       if (!a->mult_work) {
653         k = PetscMax(a->m,a->n);
654         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
655         CHKPTRQ(a->mult_work);
656       }
657       work = a->mult_work;
658       for ( i=0; i<mbs; i++ ) {
659         n     = ii[1] - ii[0]; ii++;
660         ncols = n*bs;
661         workt = work;
662         for ( j=0; j<n; j++ ) {
663           xb = x + bs*(*idx++);
664           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
665           workt += bs;
666         }
667         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
668         v += n*bs2;
669         z += bs;
670       }
671     }
672   }
673   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
674   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
675   PLogFlops(2*a->nz*bs2 - a->m);
676   return 0;
677 }
678 
679 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
680 {
681   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
682   Scalar          *xg,*zg,*zb;
683   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
684   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
685   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
686 
687 
688   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690 
691   idx   = a->j;
692   v     = a->a;
693   ii    = a->i;
694 
695   switch (bs) {
696   case 1:
697     for ( i=0; i<mbs; i++ ) {
698       n  = ii[1] - ii[0]; ii++;
699       xb = x + i; x1 = xb[0];
700       ib = idx + ai[i];
701       for ( j=0; j<n; j++ ) {
702         z[ib[j]] = *v++ * x1;
703       }
704     }
705     break;
706   case 2:
707     for ( i=0; i<mbs; i++ ) {
708       n  = ii[1] - ii[0]; ii++;
709       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
710       ib = idx + ai[i];
711       for ( j=0; j<n; j++ ) {
712         rval = ib[j]*2;
713         z[rval++] = v[0]*x1 + v[1]*x2;
714         z[rval++] = v[2]*x1 + v[3]*x2;
715         v += 4;
716       }
717     }
718     break;
719   case 3:
720     for ( i=0; i<mbs; i++ ) {
721       n  = ii[1] - ii[0]; ii++;
722       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
723       ib = idx + ai[i];
724       for ( j=0; j<n; j++ ) {
725         rval = ib[j]*3;
726         z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3;
727         z[rval++] = v[3]*x1 + v[4]*x2 + v[5]*x3;
728         z[rval++] = v[6]*x1 + v[7]*x2 + v[8]*x3;
729         v += 9;
730       }
731     }
732     break;
733   case 4:
734     for ( i=0; i<mbs; i++ ) {
735       n  = ii[1] - ii[0]; ii++;
736       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
737       ib = idx + ai[i];
738       for ( j=0; j<n; j++ ) {
739         rval = ib[j]*4;
740         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
741         z[rval++] =  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
742         z[rval++] =  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
743         z[rval++] = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
744         v += 16;
745       }
746     }
747     break;
748   case 5:
749     for ( i=0; i<mbs; i++ ) {
750       n  = ii[1] - ii[0]; ii++;
751       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
752       x4 = xb[3];   x5 = xb[4];
753       ib = idx + ai[i];
754       for ( j=0; j<n; j++ ) {
755         rval = ib[j]*5;
756         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
757         z[rval++] =  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
758         z[rval++] = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
759         z[rval++] = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
760         z[rval++] = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
761         v += 25;
762       }
763     }
764     break;
765       /* block sizes larger then 5 by 5 are handled by BLAS */
766     default: {
767       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
768       if (!a->mult_work) {
769         k = PetscMax(a->m,a->n);
770         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
771         CHKPTRQ(a->mult_work);
772       }
773       work = a->mult_work;
774       for ( i=0; i<mbs; i++ ) {
775         n     = ii[1] - ii[0]; ii++;
776         ncols = n*bs;
777         PetscMemzero(work,ncols*sizeof(Scalar));
778         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
779         v += n*bs2;
780         x += bs;
781         workt = work;
782         for ( j=0; j<n; j++ ) {
783           zb = z + bs*(*idx++);
784           for ( k=0; k<bs; k++ ) zb[k] = workt[k] ;
785           workt += bs;
786         }
787       }
788     }
789   }
790   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
791   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
792   return 0;
793 }
794 
795 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
796 {
797   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
798   if (nz)  *nz  = a->bs2*a->nz;
799   if (nza) *nza = a->maxnz;
800   if (mem) *mem = (int)A->mem;
801   return 0;
802 }
803 
804 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
805 {
806   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
807 
808   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
809 
810   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
811   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
812       (a->nz != b->nz)) {
813     *flg = PETSC_FALSE; return 0;
814   }
815 
816   /* if the a->i are the same */
817   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
818     *flg = PETSC_FALSE; return 0;
819   }
820 
821   /* if a->j are the same */
822   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
823     *flg = PETSC_FALSE; return 0;
824   }
825 
826   /* if a->a are the same */
827   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
828     *flg = PETSC_FALSE; return 0;
829   }
830   *flg = PETSC_TRUE;
831   return 0;
832 
833 }
834 
835 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
836 {
837   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
838   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
839   Scalar      *x, zero = 0.0,*aa,*aa_j;
840 
841   bs   = a->bs;
842   aa   = a->a;
843   ai   = a->i;
844   aj   = a->j;
845   ambs = a->mbs;
846   bs2  = a->bs2;
847 
848   VecSet(&zero,v);
849   VecGetArray(v,&x); VecGetLocalSize(v,&n);
850   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
851   for ( i=0; i<ambs; i++ ) {
852     for ( j=ai[i]; j<ai[i+1]; j++ ) {
853       if (aj[j] == i) {
854         row  = i*bs;
855         aa_j = aa+j*bs2;
856         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
857         break;
858       }
859     }
860   }
861   return 0;
862 }
863 
864 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
865 {
866   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
867   Scalar      *l,*r,x,*v,*aa,*li,*ri;
868   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
869 
870   ai  = a->i;
871   aj  = a->j;
872   aa  = a->a;
873   m   = a->m;
874   n   = a->n;
875   bs  = a->bs;
876   mbs = a->mbs;
877   bs2 = a->bs2;
878   if (ll) {
879     VecGetArray(ll,&l); VecGetSize(ll,&lm);
880     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
881     for ( i=0; i<mbs; i++ ) { /* for each block row */
882       M  = ai[i+1] - ai[i];
883       li = l + i*bs;
884       v  = aa + bs2*ai[i];
885       for ( j=0; j<M; j++ ) { /* for each block */
886         for ( k=0; k<bs2; k++ ) {
887           (*v++) *= li[k%bs];
888         }
889       }
890     }
891   }
892 
893   if (rr) {
894     VecGetArray(rr,&r); VecGetSize(rr,&rn);
895     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
896     for ( i=0; i<mbs; i++ ) { /* for each block row */
897       M  = ai[i+1] - ai[i];
898       v  = aa + bs2*ai[i];
899       for ( j=0; j<M; j++ ) { /* for each block */
900         ri = r + bs*aj[ai[i]+j];
901         for ( k=0; k<bs; k++ ) {
902           x = ri[k];
903           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
904         }
905       }
906     }
907   }
908   return 0;
909 }
910 
911 
912 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
913 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
914 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
915 
916 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
917 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
918 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
919 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
920 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
921 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
922 
923 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
924 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
925 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
926 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
927 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
928 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
929 
930 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
931 {
932   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
933   Scalar      *v = a->a;
934   double      sum = 0.0;
935   int         i,nz=a->nz,bs2=a->bs2;
936 
937   if (type == NORM_FROBENIUS) {
938     for (i=0; i< bs2*nz; i++ ) {
939 #if defined(PETSC_COMPLEX)
940       sum += real(conj(*v)*(*v)); v++;
941 #else
942       sum += (*v)*(*v); v++;
943 #endif
944     }
945     *norm = sqrt(sum);
946   }
947   else {
948     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
949   }
950   return 0;
951 }
952 
953 /*
954      note: This can only work for identity for row and col. It would
955    be good to check this and otherwise generate an error.
956 */
957 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
958 {
959   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
960   Mat         outA;
961   int         ierr;
962 
963   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
964 
965   outA          = inA;
966   inA->factor   = FACTOR_LU;
967   a->row        = row;
968   a->col        = col;
969 
970   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
971 
972   if (!a->diag) {
973     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
974   }
975   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
976   return 0;
977 }
978 
979 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
980 {
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
982   int         one = 1, totalnz = a->bs2*a->nz;
983   BLscal_( &totalnz, alpha, a->a, &one );
984   PLogFlops(totalnz);
985   return 0;
986 }
987 
988 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
989 {
990   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
991   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
992   int        *ai = a->i, *ailen = a->ilen;
993   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
994   Scalar     *ap, *aa = a->a, zero = 0.0;
995 
996   for ( k=0; k<m; k++ ) { /* loop over rows */
997     row  = im[k]; brow = row/bs;
998     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
999     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1000     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1001     nrow = ailen[brow];
1002     for ( l=0; l<n; l++ ) { /* loop over columns */
1003       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1004       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1005       col  = in[l] ;
1006       bcol = col/bs;
1007       cidx = col%bs;
1008       ridx = row%bs;
1009       high = nrow;
1010       low  = 0; /* assume unsorted */
1011       while (high-low > 5) {
1012         t = (low+high)/2;
1013         if (rp[t] > bcol) high = t;
1014         else             low  = t;
1015       }
1016       for ( i=low; i<high; i++ ) {
1017         if (rp[i] > bcol) break;
1018         if (rp[i] == bcol) {
1019           *v++ = ap[bs2*i+bs*cidx+ridx];
1020           goto finished;
1021         }
1022       }
1023       *v++ = zero;
1024       finished:;
1025     }
1026   }
1027   return 0;
1028 }
1029 
1030 /* -------------------------------------------------------------------*/
1031 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1032        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1033        MatMult_SeqBAIJ,0,
1034        MatMultTrans_SeqBAIJ,0,
1035        MatSolve_SeqBAIJ_N,0,
1036        0,0,
1037        MatLUFactor_SeqBAIJ,0,
1038        0,
1039        MatTranspose_SeqBAIJ,
1040        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1041        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1042        0,MatAssemblyEnd_SeqBAIJ,
1043        0,
1044        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1045        MatGetReordering_SeqBAIJ,
1046        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1047        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1048        MatILUFactorSymbolic_SeqBAIJ,0,
1049        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1050        0,0,
1051        MatConvertSameType_SeqBAIJ,0,0,
1052        MatILUFactor_SeqBAIJ,0,0,
1053        0,MatIncreaseOverlap_SeqBAIJ,
1054        MatGetValues_SeqBAIJ,0,
1055        0,MatScale_SeqBAIJ,
1056        0};
1057 
1058 /*@C
1059    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1060    compressed row) format.  For good matrix assembly performance the
1061    user should preallocate the matrix storage by setting the parameter nz
1062    (or nzz).  By setting these parameters accurately, performance can be
1063    increased by more than a factor of 50.
1064 
1065    Input Parameters:
1066 .  comm - MPI communicator, set to MPI_COMM_SELF
1067 .  bs - size of block
1068 .  m - number of rows
1069 .  n - number of columns
1070 .  nz - number of block nonzeros per block row (same for all rows)
1071 .  nzz - number of block nonzeros per block row or PETSC_NULL
1072          (possibly different for each row)
1073 
1074    Output Parameter:
1075 .  A - the matrix
1076 
1077    Notes:
1078    The block AIJ format is fully compatible with standard Fortran 77
1079    storage.  That is, the stored row and column indices can begin at
1080    either one (as in Fortran) or zero.  See the users' manual for details.
1081 
1082    Specify the preallocated storage with either nz or nnz (not both).
1083    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1084    allocation.  For additional details, see the users manual chapter on
1085    matrices and the file $(PETSC_DIR)/Performance.
1086 
1087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1088 @*/
1089 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1090 {
1091   Mat         B;
1092   Mat_SeqBAIJ *b;
1093   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1094 
1095   if (mbs*bs!=m || nbs*bs!=n)
1096     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1097 
1098   *A = 0;
1099   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1100   PLogObjectCreate(B);
1101   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1102   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1103   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1104   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1105   if (!flg) {
1106     switch (bs) {
1107       case 1:
1108         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1109         B->ops.solve           = MatSolve_SeqBAIJ_1;
1110         break;
1111       case 2:
1112         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1113         B->ops.solve           = MatSolve_SeqBAIJ_2;
1114         break;
1115       case 3:
1116         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1117         B->ops.solve           = MatSolve_SeqBAIJ_3;
1118         break;
1119       case 4:
1120         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1121         B->ops.solve           = MatSolve_SeqBAIJ_4;
1122         break;
1123       case 5:
1124         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1125         B->ops.solve           = MatSolve_SeqBAIJ_5;
1126         break;
1127     }
1128   }
1129   B->destroy          = MatDestroy_SeqBAIJ;
1130   B->view             = MatView_SeqBAIJ;
1131   B->factor           = 0;
1132   B->lupivotthreshold = 1.0;
1133   b->row              = 0;
1134   b->col              = 0;
1135   b->reallocs         = 0;
1136 
1137   b->m       = m; B->m = m; B->M = m;
1138   b->n       = n; B->n = n; B->N = n;
1139   b->mbs     = mbs;
1140   b->nbs     = nbs;
1141   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1142   if (nnz == PETSC_NULL) {
1143     if (nz == PETSC_DEFAULT) nz = 5;
1144     else if (nz <= 0)        nz = 1;
1145     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1146     nz = nz*mbs;
1147   }
1148   else {
1149     nz = 0;
1150     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1151   }
1152 
1153   /* allocate the matrix space */
1154   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1155   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1156   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1157   b->j  = (int *) (b->a + nz*bs2);
1158   PetscMemzero(b->j,nz*sizeof(int));
1159   b->i  = b->j + nz;
1160   b->singlemalloc = 1;
1161 
1162   b->i[0] = 0;
1163   for (i=1; i<mbs+1; i++) {
1164     b->i[i] = b->i[i-1] + b->imax[i-1];
1165   }
1166 
1167   /* b->ilen will count nonzeros in each block row so far. */
1168   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1169   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1170   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1171 
1172   b->bs               = bs;
1173   b->bs2              = bs2;
1174   b->mbs              = mbs;
1175   b->nz               = 0;
1176   b->maxnz            = nz;
1177   b->sorted           = 0;
1178   b->roworiented      = 1;
1179   b->nonew            = 0;
1180   b->diag             = 0;
1181   b->solve_work       = 0;
1182   b->mult_work        = 0;
1183   b->spptr            = 0;
1184   *A = B;
1185   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1186   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1187   return 0;
1188 }
1189 
1190 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1191 {
1192   Mat         C;
1193   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1194   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1195 
1196   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1197 
1198   *B = 0;
1199   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1200   PLogObjectCreate(C);
1201   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1202   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1203   C->destroy    = MatDestroy_SeqBAIJ;
1204   C->view       = MatView_SeqBAIJ;
1205   C->factor     = A->factor;
1206   c->row        = 0;
1207   c->col        = 0;
1208   C->assembled  = PETSC_TRUE;
1209 
1210   c->m = C->m   = a->m;
1211   c->n = C->n   = a->n;
1212   C->M          = a->m;
1213   C->N          = a->n;
1214 
1215   c->bs         = a->bs;
1216   c->bs2        = a->bs2;
1217   c->mbs        = a->mbs;
1218   c->nbs        = a->nbs;
1219 
1220   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1221   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1222   for ( i=0; i<mbs; i++ ) {
1223     c->imax[i] = a->imax[i];
1224     c->ilen[i] = a->ilen[i];
1225   }
1226 
1227   /* allocate the matrix space */
1228   c->singlemalloc = 1;
1229   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1230   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1231   c->j  = (int *) (c->a + nz*bs2);
1232   c->i  = c->j + nz;
1233   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1234   if (mbs > 0) {
1235     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1236     if (cpvalues == COPY_VALUES) {
1237       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1238     }
1239   }
1240 
1241   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1242   c->sorted      = a->sorted;
1243   c->roworiented = a->roworiented;
1244   c->nonew       = a->nonew;
1245 
1246   if (a->diag) {
1247     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1248     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1249     for ( i=0; i<mbs; i++ ) {
1250       c->diag[i] = a->diag[i];
1251     }
1252   }
1253   else c->diag          = 0;
1254   c->nz                 = a->nz;
1255   c->maxnz              = a->maxnz;
1256   c->solve_work         = 0;
1257   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1258   c->mult_work          = 0;
1259   *B = C;
1260   return 0;
1261 }
1262 
1263 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1264 {
1265   Mat_SeqBAIJ  *a;
1266   Mat          B;
1267   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1268   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1269   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1270   int          *masked, nmask,tmp,bs2,ishift;
1271   Scalar       *aa;
1272   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1273 
1274   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1275   bs2  = bs*bs;
1276 
1277   MPI_Comm_size(comm,&size);
1278   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1279   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1280   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1281   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1282   M = header[1]; N = header[2]; nz = header[3];
1283 
1284   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1285 
1286   /*
1287      This code adds extra rows to make sure the number of rows is
1288     divisible by the blocksize
1289   */
1290   mbs        = M/bs;
1291   extra_rows = bs - M + bs*(mbs);
1292   if (extra_rows == bs) extra_rows = 0;
1293   else                  mbs++;
1294   if (extra_rows) {
1295     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1296   }
1297 
1298   /* read in row lengths */
1299   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1300   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1301   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1302 
1303   /* read in column indices */
1304   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1305   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1306   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1307 
1308   /* loop over row lengths determining block row lengths */
1309   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1310   PetscMemzero(browlengths,mbs*sizeof(int));
1311   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1312   PetscMemzero(mask,mbs*sizeof(int));
1313   masked = mask + mbs;
1314   rowcount = 0; nzcount = 0;
1315   for ( i=0; i<mbs; i++ ) {
1316     nmask = 0;
1317     for ( j=0; j<bs; j++ ) {
1318       kmax = rowlengths[rowcount];
1319       for ( k=0; k<kmax; k++ ) {
1320         tmp = jj[nzcount++]/bs;
1321         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1322       }
1323       rowcount++;
1324     }
1325     browlengths[i] += nmask;
1326     /* zero out the mask elements we set */
1327     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1328   }
1329 
1330   /* create our matrix */
1331   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1332          CHKERRQ(ierr);
1333   B = *A;
1334   a = (Mat_SeqBAIJ *) B->data;
1335 
1336   /* set matrix "i" values */
1337   a->i[0] = 0;
1338   for ( i=1; i<= mbs; i++ ) {
1339     a->i[i]      = a->i[i-1] + browlengths[i-1];
1340     a->ilen[i-1] = browlengths[i-1];
1341   }
1342   a->nz         = 0;
1343   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1344 
1345   /* read in nonzero values */
1346   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1347   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1348   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1349 
1350   /* set "a" and "j" values into matrix */
1351   nzcount = 0; jcount = 0;
1352   for ( i=0; i<mbs; i++ ) {
1353     nzcountb = nzcount;
1354     nmask    = 0;
1355     for ( j=0; j<bs; j++ ) {
1356       kmax = rowlengths[i*bs+j];
1357       for ( k=0; k<kmax; k++ ) {
1358         tmp = jj[nzcount++]/bs;
1359 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1360       }
1361       rowcount++;
1362     }
1363     /* sort the masked values */
1364     PetscSortInt(nmask,masked);
1365 
1366     /* set "j" values into matrix */
1367     maskcount = 1;
1368     for ( j=0; j<nmask; j++ ) {
1369       a->j[jcount++]  = masked[j];
1370       mask[masked[j]] = maskcount++;
1371     }
1372     /* set "a" values into matrix */
1373     ishift = bs2*a->i[i];
1374     for ( j=0; j<bs; j++ ) {
1375       kmax = rowlengths[i*bs+j];
1376       for ( k=0; k<kmax; k++ ) {
1377         tmp    = jj[nzcountb]/bs ;
1378         block  = mask[tmp] - 1;
1379         point  = jj[nzcountb] - bs*tmp;
1380         idx    = ishift + bs2*block + j + bs*point;
1381         a->a[idx] = aa[nzcountb++];
1382       }
1383     }
1384     /* zero out the mask elements we set */
1385     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1386   }
1387   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1388 
1389   PetscFree(rowlengths);
1390   PetscFree(browlengths);
1391   PetscFree(aa);
1392   PetscFree(jj);
1393   PetscFree(mask);
1394 
1395   B->assembled = PETSC_TRUE;
1396 
1397   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1398   if (flg) {
1399     Viewer tviewer;
1400     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1401     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1402     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1403     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1404   }
1405   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1406   if (flg) {
1407     Viewer tviewer;
1408     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1409     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1410     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1411     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1412   }
1413   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1414   if (flg) {
1415     Viewer tviewer;
1416     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1417     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1418     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1419   }
1420   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1421   if (flg) {
1422     Viewer tviewer;
1423     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1424     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1425     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1426     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1427   }
1428   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1429   if (flg) {
1430     Viewer tviewer;
1431     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1432     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1433     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1434     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1435   }
1436   return 0;
1437 }
1438 
1439 
1440 
1441