xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1a6a6d987255f88adf325b5bb4a1e46559915718)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.39 1996/04/16 13:48:15 balay Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
220 {
221   Mat         A = (Mat) obj;
222   ViewerType  vtype;
223   int         ierr;
224 
225   if (!viewer) {
226     viewer = STDOUT_VIEWER_SELF;
227   }
228 
229   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
230   if (vtype == MATLAB_VIEWER) {
231     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
232   }
233   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234     return MatView_SeqBAIJ_ASCII(A,viewer);
235   }
236   else if (vtype == BINARY_FILE_VIEWER) {
237     return MatView_SeqBAIJ_Binary(A,viewer);
238   }
239   else if (vtype == DRAW_VIEWER) {
240     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
241   }
242   return 0;
243 }
244 
245 #define CHUNKSIZE  10
246 
247 /* This version has row oriented v  */
248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249 {
250   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
254   int          ridx,cidx,bs2=a->bs2;
255   Scalar      *ap,value,*aa=a->a,*bap;
256 
257   for ( k=0; k<m; k++ ) { /* loop over added rows */
258     row  = im[k]; brow = row/bs;
259     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262     rmax = imax[brow]; nrow = ailen[brow];
263     low = 0;
264     for ( l=0; l<n; l++ ) { /* loop over added columns */
265       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267       col = in[l]; bcol = col/bs;
268       ridx = row % bs; cidx = col % bs;
269       if (roworiented) {
270         value = *v++;
271       }
272       else {
273         value = v[k + l*m];
274       }
275       if (!sorted) low = 0; high = nrow;
276       while (high-low > 5) {
277         t = (low+high)/2;
278         if (rp[t] > bcol) high = t;
279         else             low  = t;
280       }
281       for ( i=low; i<high; i++ ) {
282         if (rp[i] > bcol) break;
283         if (rp[i] == bcol) {
284           bap  = ap +  bs2*i + bs*cidx + ridx;
285           if (is == ADD_VALUES) *bap += value;
286           else                  *bap  = value;
287           goto noinsert;
288         }
289       }
290       if (nonew) goto noinsert;
291       if (nrow >= rmax) {
292         /* there is no extra room in row, therefore enlarge */
293         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294         Scalar *new_a;
295 
296         /* malloc new storage space */
297         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
299         new_j   = (int *) (new_a + bs2*new_nz);
300         new_i   = new_j + new_nz;
301 
302         /* copy over old data into new slots */
303         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
304         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308                                                            len*sizeof(int));
309         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313         /* free up old matrix storage */
314         PetscFree(a->a);
315         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317         a->singlemalloc = 1;
318 
319         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
321         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
322         a->maxnz += CHUNKSIZE;
323         a->reallocs++;
324         a->nz++;
325       }
326       N = nrow++ - 1;
327       /* shift up all the later entries in this row */
328       for ( ii=N; ii>=i; ii-- ) {
329         rp[ii+1] = rp[ii];
330          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331       }
332       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333       rp[i] = bcol;
334       ap[bs2*i + bs*cidx + ridx] = value;
335       noinsert:;
336       low = i;
337     }
338     ailen[brow] = nrow;
339   }
340   return 0;
341 }
342 
343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
344 {
345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
346   *m = a->m; *n = a->n;
347   return 0;
348 }
349 
350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = 0; *n = a->m;
354   return 0;
355 }
356 
357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
361   Scalar      *aa,*v_i,*aa_i;
362 
363   bs  = a->bs;
364   ai  = a->i;
365   aj  = a->j;
366   aa  = a->a;
367   bs2 = a->bs2;
368 
369   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
370 
371   bn  = row/bs;   /* Block number */
372   bp  = row % bs; /* Block Position */
373   M   = ai[bn+1] - ai[bn];
374   *nz = bs*M;
375 
376   if (v) {
377     *v = 0;
378     if (*nz) {
379       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
380       for ( i=0; i<M; i++ ) { /* for each block in the block row */
381         v_i  = *v + i*bs;
382         aa_i = aa + bs2*(ai[bn] + i);
383         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
384       }
385     }
386   }
387 
388   if (idx) {
389     *idx = 0;
390     if (*nz) {
391       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
392       for ( i=0; i<M; i++ ) { /* for each block in the block row */
393         idx_i = *idx + i*bs;
394         itmp  = bs*aj[ai[bn] + i];
395         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
396       }
397     }
398   }
399   return 0;
400 }
401 
402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
403 {
404   if (idx) {if (*idx) PetscFree(*idx);}
405   if (v)   {if (*v)   PetscFree(*v);}
406   return 0;
407 }
408 
409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410 {
411   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412   Mat         C;
413   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
414   int         *rows,*cols,bs2=a->bs2;
415   Scalar      *array=a->a;
416 
417   if (B==PETSC_NULL && mbs!=nbs)
418     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420   PetscMemzero(col,(1+nbs)*sizeof(int));
421 
422   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424   PetscFree(col);
425   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426   cols = rows + bs;
427   for ( i=0; i<mbs; i++ ) {
428     cols[0] = i*bs;
429     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430     len = ai[i+1] - ai[i];
431     for ( j=0; j<len; j++ ) {
432       rows[0] = (*aj++)*bs;
433       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435       array += bs2;
436     }
437   }
438   PetscFree(rows);
439 
440   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442 
443   if (B != PETSC_NULL) {
444     *B = C;
445   } else {
446     /* This isn't really an in-place transpose */
447     PetscFree(a->a);
448     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449     if (a->diag) PetscFree(a->diag);
450     if (a->ilen) PetscFree(a->ilen);
451     if (a->imax) PetscFree(a->imax);
452     if (a->solve_work) PetscFree(a->solve_work);
453     PetscFree(a);
454     PetscMemcpy(A,C,sizeof(struct _Mat));
455     PetscHeaderDestroy(C);
456   }
457   return 0;
458 }
459 
460 
461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462 {
463   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465   int        m = a->m,*ip, N, *ailen = a->ilen;
466   int        mbs = a->mbs, bs2 = a->bs2;
467   Scalar     *aa = a->a, *ap;
468 
469   if (mode == FLUSH_ASSEMBLY) return 0;
470 
471   for ( i=1; i<mbs; i++ ) {
472     /* move each row back by the amount of empty slots (fshift) before it*/
473     fshift += imax[i-1] - ailen[i-1];
474     if (fshift) {
475       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476       N = ailen[i];
477       for ( j=0; j<N; j++ ) {
478         ip[j-fshift] = ip[j];
479         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480       }
481     }
482     ai[i] = ai[i-1] + ailen[i-1];
483   }
484   if (mbs) {
485     fshift += imax[mbs-1] - ailen[mbs-1];
486     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487   }
488   /* reset ilen and imax for each row */
489   for ( i=0; i<mbs; i++ ) {
490     ailen[i] = imax[i] = ai[i+1] - ai[i];
491   }
492   a->nz = ai[mbs];
493 
494   /* diagonals may have moved, so kill the diagonal pointers */
495   if (fshift && a->diag) {
496     PetscFree(a->diag);
497     PLogObjectMemory(A,-(m+1)*sizeof(int));
498     a->diag = 0;
499   }
500   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502            a->reallocs);
503   return 0;
504 }
505 
506 static int MatZeroEntries_SeqBAIJ(Mat A)
507 {
508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
509   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
510   return 0;
511 }
512 
513 int MatDestroy_SeqBAIJ(PetscObject obj)
514 {
515   Mat         A  = (Mat) obj;
516   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
517 
518 #if defined(PETSC_LOG)
519   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
520 #endif
521   PetscFree(a->a);
522   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
523   if (a->diag) PetscFree(a->diag);
524   if (a->ilen) PetscFree(a->ilen);
525   if (a->imax) PetscFree(a->imax);
526   if (a->solve_work) PetscFree(a->solve_work);
527   if (a->mult_work) PetscFree(a->mult_work);
528   PetscFree(a);
529   PLogObjectDestroy(A);
530   PetscHeaderDestroy(A);
531   return 0;
532 }
533 
534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
535 {
536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
537   if      (op == ROW_ORIENTED)              a->roworiented = 1;
538   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
539   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
540   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
541   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
542   else if (op == ROWS_SORTED ||
543            op == SYMMETRIC_MATRIX ||
544            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
545            op == YES_NEW_DIAGONALS)
546     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
547   else if (op == NO_NEW_DIAGONALS)
548     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
549   else
550     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
551   return 0;
552 }
553 
554 
555 /* -------------------------------------------------------*/
556 /* Should check that shapes of vectors and matrices match */
557 /* -------------------------------------------------------*/
558 #include "pinclude/plapack.h"
559 
560 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz)
561 {
562   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
563   Scalar          *xg,*zg;
564   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
565   register Scalar x1,x2,x3,x4,x5;
566   int             mbs=a->mbs,i,*idx,*ii;
567   int             bs=a->bs,j,n,bs2=a->bs2,ierr;
568 
569   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
570   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
571 
572   idx   = a->j;
573   v     = a->a;
574   ii    = a->i;
575 
576   switch (bs) {
577   case 1:
578     for ( i=0; i<mbs; i++ ) {
579       n    = ii[1] - ii[0]; ii++;
580       sum  = 0.0;
581       while (n--) sum += *v++ * x[*idx++];
582       z[i] = sum;
583     }
584     break;
585   case 2:
586     for ( i=0; i<mbs; i++ ) {
587       n  = ii[1] - ii[0]; ii++;
588       sum1 = 0.0; sum2 = 0.0;
589       for ( j=0; j<n; j++ ) {
590         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
591         sum1 += v[0]*x1 + v[2]*x2;
592         sum2 += v[1]*x1 + v[3]*x2;
593         v += 4;
594       }
595       z[0] = sum1; z[1] = sum2;
596       z += 2;
597     }
598     break;
599   case 3:
600     for ( i=0; i<mbs; i++ ) {
601       n  = ii[1] - ii[0]; ii++;
602       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
603       for ( j=0; j<n; j++ ) {
604         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
605         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
606         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
607         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
608         v += 9;
609       }
610       z[0] = sum1; z[1] = sum2; z[2] = sum3;
611       z += 3;
612     }
613     break;
614   case 4:
615     for ( i=0; i<mbs; i++ ) {
616       n  = ii[1] - ii[0]; ii++;
617       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
618       for ( j=0; j<n; j++ ) {
619         xb = x + 4*(*idx++);
620         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
621         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
622         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
623         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
624         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
625         v += 16;
626       }
627       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
628       z += 4;
629     }
630     break;
631   case 5:
632     for ( i=0; i<mbs; i++ ) {
633       n  = ii[1] - ii[0]; ii++;
634       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
635       for ( j=0; j<n; j++ ) {
636         xb = x + 5*(*idx++);
637         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
638         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
639         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
640         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
641         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
642         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
643         v += 25;
644       }
645       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
646       z += 5;
647     }
648     break;
649     /* block sizes larger then 5 by 5 are handled by BLAS */
650   default: {
651       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
652       if (!a->mult_work) {
653         k = PetscMax(a->m,a->n);
654         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
655         CHKPTRQ(a->mult_work);
656       }
657       work = a->mult_work;
658       for ( i=0; i<mbs; i++ ) {
659         n     = ii[1] - ii[0]; ii++;
660         ncols = n*bs;
661         workt = work;
662         for ( j=0; j<n; j++ ) {
663           xb = x + bs*(*idx++);
664           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
665           workt += bs;
666         }
667         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
668         v += n*bs2;
669         z += bs;
670       }
671     }
672   }
673   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
674   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
675   PLogFlops(2*a->nz*bs2 - a->m);
676   return 0;
677 }
678 
679 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
680 {
681   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
682   Scalar          *xg,*zg,*zb;
683   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
684   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
685   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
686 
687 
688   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690 
691   idx   = a->j;
692   v     = a->a;
693   ii    = a->i;
694 
695   switch (bs) {
696   case 1:
697     for ( i=0; i<mbs; i++ ) {
698       n  = ii[1] - ii[0]; ii++;
699       xb = x + i; x1 = xb[0];
700       ib = idx + ai[i];
701       for ( j=0; j<n; j++ ) {
702         z[ib[j]] = *v++ * x1;
703       }
704     }
705     break;
706   case 2:
707     for ( i=0; i<mbs; i++ ) {
708       n  = ii[1] - ii[0]; ii++;
709       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
710       ib = idx + ai[i];
711       for ( j=0; j<n; j++ ) {
712         rval = ib[j]*2;
713         z[rval++] = v[0]*x1 + v[1]*x2;
714         z[rval++] = v[2]*x1 + v[3]*x2;
715         v += 4;
716       }
717     }
718     break;
719   case 3:
720     for ( i=0; i<mbs; i++ ) {
721       n  = ii[1] - ii[0]; ii++;
722       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
723       ib = idx + ai[i];
724       for ( j=0; j<n; j++ ) {
725         rval = ib[j]*3;
726         z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3;
727         z[rval++] = v[3]*x1 + v[4]*x2 + v[5]*x3;
728         z[rval++] = v[6]*x1 + v[7]*x2 + v[8]*x3;
729         v += 9;
730       }
731     }
732     break;
733   case 4:
734     for ( i=0; i<mbs; i++ ) {
735       n  = ii[1] - ii[0]; ii++;
736       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
737       ib = idx + ai[i];
738       for ( j=0; j<n; j++ ) {
739         rval = ib[j]*4;
740         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
741         z[rval++] =  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
742         z[rval++] =  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
743         z[rval++] = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
744         v += 16;
745       }
746     }
747     break;
748   case 5:
749     for ( i=0; i<mbs; i++ ) {
750       n  = ii[1] - ii[0]; ii++;
751       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
752       x4 = xb[3];   x5 = xb[4];
753       ib = idx + ai[i];
754       for ( j=0; j<n; j++ ) {
755         rval = ib[j]*5;
756         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
757         z[rval++] =  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
758         z[rval++] = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
759         z[rval++] = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
760         z[rval++] = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
761         v += 25;
762       }
763     }
764     break;
765       /* block sizes larger then 5 by 5 are handled by BLAS */
766     default: {
767       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
768       if (!a->mult_work) {
769         k = PetscMax(a->m,a->n);
770         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
771         CHKPTRQ(a->mult_work);
772       }
773       work = a->mult_work;
774       for ( i=0; i<mbs; i++ ) {
775         n     = ii[1] - ii[0]; ii++;
776         ncols = n*bs;
777         PetscMemzero(work,ncols*sizeof(Scalar));
778         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
779         v += n*bs2;
780         x += bs;
781         workt = work;
782         for ( j=0; j<n; j++ ) {
783           zb = z + bs*(*idx++);
784           for ( k=0; k<bs; k++ ) zb[k] = workt[k] ;
785           workt += bs;
786         }
787       }
788     }
789   }
790   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
791   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
792   return 0;
793 }
794 
795 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
796 {
797   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
798   if (nz)  *nz  = a->bs2*a->nz;
799   if (nza) *nza = a->maxnz;
800   if (mem) *mem = (int)A->mem;
801   return 0;
802 }
803 
804 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
805 {
806   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
807 
808   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
809 
810   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
811   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
812       (a->nz != b->nz)) {
813     *flg = PETSC_FALSE; return 0;
814   }
815 
816   /* if the a->i are the same */
817   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
818     *flg = PETSC_FALSE; return 0;
819   }
820 
821   /* if a->j are the same */
822   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
823     *flg = PETSC_FALSE; return 0;
824   }
825 
826   /* if a->a are the same */
827   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
828     *flg = PETSC_FALSE; return 0;
829   }
830   *flg = PETSC_TRUE;
831   return 0;
832 
833 }
834 
835 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
836 {
837   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
838   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
839   Scalar      *x, zero = 0.0,*aa,*aa_j;
840 
841   bs   = a->bs;
842   aa   = a->a;
843   ai   = a->i;
844   aj   = a->j;
845   ambs = a->mbs;
846   bs2  = a->bs2;
847 
848   VecSet(&zero,v);
849   VecGetArray(v,&x); VecGetLocalSize(v,&n);
850   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
851   for ( i=0; i<ambs; i++ ) {
852     for ( j=ai[i]; j<ai[i+1]; j++ ) {
853       if (aj[j] == i) {
854         row  = i*bs;
855         aa_j = aa+j*bs2;
856         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
857         break;
858       }
859     }
860   }
861   return 0;
862 }
863 
864 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
865 {
866   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
867   Scalar      *l,*r,x,*v,*aa,*li,*ri;
868   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
869 
870   ai  = a->i;
871   aj  = a->j;
872   aa  = a->a;
873   m   = a->m;
874   n   = a->n;
875   bs  = a->bs;
876   mbs = a->mbs;
877   bs2 = a->bs2;
878   if (ll) {
879     VecGetArray(ll,&l); VecGetSize(ll,&lm);
880     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
881     for ( i=0; i<mbs; i++ ) { /* for each block row */
882       M  = ai[i+1] - ai[i];
883       li = l + i*bs;
884       v  = aa + bs2*ai[i];
885       for ( j=0; j<M; j++ ) { /* for each block */
886         for ( k=0; k<bs2; k++ ) {
887           (*v++) *= li[k%bs];
888         }
889       }
890     }
891   }
892 
893   if (rr) {
894     VecGetArray(rr,&r); VecGetSize(rr,&rn);
895     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
896     for ( i=0; i<mbs; i++ ) { /* for each block row */
897       M  = ai[i+1] - ai[i];
898       v  = aa + bs2*ai[i];
899       for ( j=0; j<M; j++ ) { /* for each block */
900         ri = r + bs*aj[ai[i]+j];
901         for ( k=0; k<bs; k++ ) {
902           x = ri[k];
903           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
904         }
905       }
906     }
907   }
908   return 0;
909 }
910 
911 
912 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
913 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
914 
915 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
916 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
917 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
918 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
919 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
920 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
921 
922 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
923 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
924 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
925 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
926 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
927 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
928 
929 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
930 {
931   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
932   Scalar      *v = a->a;
933   double      sum = 0.0;
934   int         i,nz=a->nz,bs2=a->bs2;
935 
936   if (type == NORM_FROBENIUS) {
937     for (i=0; i< bs2*nz; i++ ) {
938 #if defined(PETSC_COMPLEX)
939       sum += real(conj(*v)*(*v)); v++;
940 #else
941       sum += (*v)*(*v); v++;
942 #endif
943     }
944     *norm = sqrt(sum);
945   }
946   else {
947     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
948   }
949   return 0;
950 }
951 
952 /*
953      note: This can only work for identity for row and col. It would
954    be good to check this and otherwise generate an error.
955 */
956 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
957 {
958   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
959   Mat         outA;
960   int         ierr;
961 
962   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
963 
964   outA          = inA;
965   inA->factor   = FACTOR_LU;
966   a->row        = row;
967   a->col        = col;
968 
969   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
970 
971   if (!a->diag) {
972     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
973   }
974   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
975   return 0;
976 }
977 
978 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
979 {
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
981   int         one = 1, totalnz = a->bs2*a->nz;
982   BLscal_( &totalnz, alpha, a->a, &one );
983   PLogFlops(totalnz);
984   return 0;
985 }
986 
987 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
988 {
989   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
990   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
991   int        *ai = a->i, *ailen = a->ilen;
992   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
993   Scalar     *ap, *aa = a->a, zero = 0.0;
994 
995   for ( k=0; k<m; k++ ) { /* loop over rows */
996     row  = im[k]; brow = row/bs;
997     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
998     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
999     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1000     nrow = ailen[brow];
1001     for ( l=0; l<n; l++ ) { /* loop over columns */
1002       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1003       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1004       col  = in[l] ;
1005       bcol = col/bs;
1006       cidx = col%bs;
1007       ridx = row%bs;
1008       high = nrow;
1009       low  = 0; /* assume unsorted */
1010       while (high-low > 5) {
1011         t = (low+high)/2;
1012         if (rp[t] > bcol) high = t;
1013         else             low  = t;
1014       }
1015       for ( i=low; i<high; i++ ) {
1016         if (rp[i] > bcol) break;
1017         if (rp[i] == bcol) {
1018           *v++ = ap[bs2*i+bs*cidx+ridx];
1019           goto finished;
1020         }
1021       }
1022       *v++ = zero;
1023       finished:;
1024     }
1025   }
1026   return 0;
1027 }
1028 
1029 /* -------------------------------------------------------------------*/
1030 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1031        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1032        MatMult_SeqBAIJ,0,
1033        MatMultTrans_SeqBAIJ,0,
1034        MatSolve_SeqBAIJ_N,0,
1035        0,0,
1036        MatLUFactor_SeqBAIJ,0,
1037        0,
1038        MatTranspose_SeqBAIJ,
1039        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1040        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1041        0,MatAssemblyEnd_SeqBAIJ,
1042        0,
1043        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1044        MatGetReordering_SeqBAIJ,
1045        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1046        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1047        MatILUFactorSymbolic_SeqBAIJ,0,
1048        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1049        0,0,
1050        MatConvertSameType_SeqBAIJ,0,0,
1051        MatILUFactor_SeqBAIJ,0,0,
1052        0,0,
1053        MatGetValues_SeqBAIJ,0,
1054        0,MatScale_SeqBAIJ,
1055        0};
1056 
1057 /*@C
1058    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1059    compressed row) format.  For good matrix assembly performance the
1060    user should preallocate the matrix storage by setting the parameter nz
1061    (or nzz).  By setting these parameters accurately, performance can be
1062    increased by more than a factor of 50.
1063 
1064    Input Parameters:
1065 .  comm - MPI communicator, set to MPI_COMM_SELF
1066 .  bs - size of block
1067 .  m - number of rows
1068 .  n - number of columns
1069 .  nz - number of block nonzeros per block row (same for all rows)
1070 .  nzz - number of block nonzeros per block row or PETSC_NULL
1071          (possibly different for each row)
1072 
1073    Output Parameter:
1074 .  A - the matrix
1075 
1076    Notes:
1077    The block AIJ format is fully compatible with standard Fortran 77
1078    storage.  That is, the stored row and column indices can begin at
1079    either one (as in Fortran) or zero.  See the users' manual for details.
1080 
1081    Specify the preallocated storage with either nz or nnz (not both).
1082    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1083    allocation.  For additional details, see the users manual chapter on
1084    matrices and the file $(PETSC_DIR)/Performance.
1085 
1086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1087 @*/
1088 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1089 {
1090   Mat         B;
1091   Mat_SeqBAIJ *b;
1092   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1093 
1094   if (mbs*bs!=m || nbs*bs!=n)
1095     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1096 
1097   *A = 0;
1098   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1099   PLogObjectCreate(B);
1100   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1101   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1102   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1103   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1104   if (!flg) {
1105     switch (bs) {
1106       case 1:
1107         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1108         B->ops.solve           = MatSolve_SeqBAIJ_1;
1109         break;
1110       case 2:
1111         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1112         B->ops.solve           = MatSolve_SeqBAIJ_2;
1113         break;
1114       case 3:
1115         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1116         B->ops.solve           = MatSolve_SeqBAIJ_3;
1117         break;
1118       case 4:
1119         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1120         B->ops.solve           = MatSolve_SeqBAIJ_4;
1121         break;
1122       case 5:
1123         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1124         B->ops.solve           = MatSolve_SeqBAIJ_5;
1125         break;
1126     }
1127   }
1128   B->destroy          = MatDestroy_SeqBAIJ;
1129   B->view             = MatView_SeqBAIJ;
1130   B->factor           = 0;
1131   B->lupivotthreshold = 1.0;
1132   b->row              = 0;
1133   b->col              = 0;
1134   b->reallocs         = 0;
1135 
1136   b->m       = m; B->m = m; B->M = m;
1137   b->n       = n; B->n = n; B->N = n;
1138   b->mbs     = mbs;
1139   b->nbs     = nbs;
1140   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1141   if (nnz == PETSC_NULL) {
1142     if (nz == PETSC_DEFAULT) nz = 5;
1143     else if (nz <= 0)        nz = 1;
1144     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1145     nz = nz*mbs;
1146   }
1147   else {
1148     nz = 0;
1149     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1150   }
1151 
1152   /* allocate the matrix space */
1153   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1154   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1155   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1156   b->j  = (int *) (b->a + nz*bs2);
1157   PetscMemzero(b->j,nz*sizeof(int));
1158   b->i  = b->j + nz;
1159   b->singlemalloc = 1;
1160 
1161   b->i[0] = 0;
1162   for (i=1; i<mbs+1; i++) {
1163     b->i[i] = b->i[i-1] + b->imax[i-1];
1164   }
1165 
1166   /* b->ilen will count nonzeros in each block row so far. */
1167   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1168   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1169   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1170 
1171   b->bs               = bs;
1172   b->bs2              = bs2;
1173   b->mbs              = mbs;
1174   b->nz               = 0;
1175   b->maxnz            = nz;
1176   b->sorted           = 0;
1177   b->roworiented      = 1;
1178   b->nonew            = 0;
1179   b->diag             = 0;
1180   b->solve_work       = 0;
1181   b->mult_work        = 0;
1182   b->spptr            = 0;
1183   *A = B;
1184   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1185   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1186   return 0;
1187 }
1188 
1189 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1190 {
1191   Mat         C;
1192   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1193   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1194 
1195   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1196 
1197   *B = 0;
1198   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1199   PLogObjectCreate(C);
1200   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1201   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1202   C->destroy    = MatDestroy_SeqBAIJ;
1203   C->view       = MatView_SeqBAIJ;
1204   C->factor     = A->factor;
1205   c->row        = 0;
1206   c->col        = 0;
1207   C->assembled  = PETSC_TRUE;
1208 
1209   c->m = C->m   = a->m;
1210   c->n = C->n   = a->n;
1211   C->M          = a->m;
1212   C->N          = a->n;
1213 
1214   c->bs         = a->bs;
1215   c->bs2        = a->bs2;
1216   c->mbs        = a->mbs;
1217   c->nbs        = a->nbs;
1218 
1219   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1220   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1221   for ( i=0; i<mbs; i++ ) {
1222     c->imax[i] = a->imax[i];
1223     c->ilen[i] = a->ilen[i];
1224   }
1225 
1226   /* allocate the matrix space */
1227   c->singlemalloc = 1;
1228   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1229   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1230   c->j  = (int *) (c->a + nz*bs2);
1231   c->i  = c->j + nz;
1232   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1233   if (mbs > 0) {
1234     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1235     if (cpvalues == COPY_VALUES) {
1236       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1237     }
1238   }
1239 
1240   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1241   c->sorted      = a->sorted;
1242   c->roworiented = a->roworiented;
1243   c->nonew       = a->nonew;
1244 
1245   if (a->diag) {
1246     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1247     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1248     for ( i=0; i<mbs; i++ ) {
1249       c->diag[i] = a->diag[i];
1250     }
1251   }
1252   else c->diag          = 0;
1253   c->nz                 = a->nz;
1254   c->maxnz              = a->maxnz;
1255   c->solve_work         = 0;
1256   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1257   c->mult_work          = 0;
1258   *B = C;
1259   return 0;
1260 }
1261 
1262 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1263 {
1264   Mat_SeqBAIJ  *a;
1265   Mat          B;
1266   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1267   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1268   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1269   int          *masked, nmask,tmp,bs2,ishift;
1270   Scalar       *aa;
1271   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1272 
1273   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1274   bs2  = bs*bs;
1275 
1276   MPI_Comm_size(comm,&size);
1277   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1278   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1279   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1280   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1281   M = header[1]; N = header[2]; nz = header[3];
1282 
1283   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1284 
1285   /*
1286      This code adds extra rows to make sure the number of rows is
1287     divisible by the blocksize
1288   */
1289   mbs        = M/bs;
1290   extra_rows = bs - M + bs*(mbs);
1291   if (extra_rows == bs) extra_rows = 0;
1292   else                  mbs++;
1293   if (extra_rows) {
1294     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1295   }
1296 
1297   /* read in row lengths */
1298   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1299   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1300   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1301 
1302   /* read in column indices */
1303   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1304   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1305   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1306 
1307   /* loop over row lengths determining block row lengths */
1308   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1309   PetscMemzero(browlengths,mbs*sizeof(int));
1310   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1311   PetscMemzero(mask,mbs*sizeof(int));
1312   masked = mask + mbs;
1313   rowcount = 0; nzcount = 0;
1314   for ( i=0; i<mbs; i++ ) {
1315     nmask = 0;
1316     for ( j=0; j<bs; j++ ) {
1317       kmax = rowlengths[rowcount];
1318       for ( k=0; k<kmax; k++ ) {
1319         tmp = jj[nzcount++]/bs;
1320         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1321       }
1322       rowcount++;
1323     }
1324     browlengths[i] += nmask;
1325     /* zero out the mask elements we set */
1326     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1327   }
1328 
1329   /* create our matrix */
1330   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1331          CHKERRQ(ierr);
1332   B = *A;
1333   a = (Mat_SeqBAIJ *) B->data;
1334 
1335   /* set matrix "i" values */
1336   a->i[0] = 0;
1337   for ( i=1; i<= mbs; i++ ) {
1338     a->i[i]      = a->i[i-1] + browlengths[i-1];
1339     a->ilen[i-1] = browlengths[i-1];
1340   }
1341   a->nz         = 0;
1342   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1343 
1344   /* read in nonzero values */
1345   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1346   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1347   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1348 
1349   /* set "a" and "j" values into matrix */
1350   nzcount = 0; jcount = 0;
1351   for ( i=0; i<mbs; i++ ) {
1352     nzcountb = nzcount;
1353     nmask    = 0;
1354     for ( j=0; j<bs; j++ ) {
1355       kmax = rowlengths[i*bs+j];
1356       for ( k=0; k<kmax; k++ ) {
1357         tmp = jj[nzcount++]/bs;
1358 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1359       }
1360       rowcount++;
1361     }
1362     /* sort the masked values */
1363     PetscSortInt(nmask,masked);
1364 
1365     /* set "j" values into matrix */
1366     maskcount = 1;
1367     for ( j=0; j<nmask; j++ ) {
1368       a->j[jcount++]  = masked[j];
1369       mask[masked[j]] = maskcount++;
1370     }
1371     /* set "a" values into matrix */
1372     ishift = bs2*a->i[i];
1373     for ( j=0; j<bs; j++ ) {
1374       kmax = rowlengths[i*bs+j];
1375       for ( k=0; k<kmax; k++ ) {
1376         tmp    = jj[nzcountb]/bs ;
1377         block  = mask[tmp] - 1;
1378         point  = jj[nzcountb] - bs*tmp;
1379         idx    = ishift + bs2*block + j + bs*point;
1380         a->a[idx] = aa[nzcountb++];
1381       }
1382     }
1383     /* zero out the mask elements we set */
1384     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1385   }
1386   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1387 
1388   PetscFree(rowlengths);
1389   PetscFree(browlengths);
1390   PetscFree(aa);
1391   PetscFree(jj);
1392   PetscFree(mask);
1393 
1394   B->assembled = PETSC_TRUE;
1395 
1396   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1397   if (flg) {
1398     Viewer tviewer;
1399     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1400     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1401     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1402     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1403   }
1404   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1405   if (flg) {
1406     Viewer tviewer;
1407     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1408     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1409     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1410     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1411   }
1412   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1413   if (flg) {
1414     Viewer tviewer;
1415     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1416     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1417     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1418   }
1419   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1420   if (flg) {
1421     Viewer tviewer;
1422     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1423     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1424     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1425     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1426   }
1427   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1428   if (flg) {
1429     Viewer tviewer;
1430     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1431     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1432     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1433     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1434   }
1435   return 0;
1436 }
1437 
1438 
1439 
1440