xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8d1cd36bbae09094f8343e481f39717c4ade5555)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.29 1996/04/06 00:00:43 balay Exp curfman $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else if (format == ASCII_FORMAT_COMMON) {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
183 #else
184           if (a->a[bs2*k + l*bs + j] != 0.0)
185             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
186 #endif
187           }
188         }
189         fprintf(fd,"\n");
190       }
191     }
192   }
193   else {
194     for ( i=0; i<a->mbs; i++ ) {
195       for ( j=0; j<bs; j++ ) {
196         fprintf(fd,"row %d:",i*bs+j);
197         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198           for ( l=0; l<bs; l++ ) {
199 #if defined(PETSC_COMPLEX)
200           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
201             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
202               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
203           }
204           else {
205             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
206           }
207 #else
208             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
209 #endif
210           }
211         }
212         fprintf(fd,"\n");
213       }
214     }
215   }
216   fflush(fd);
217   return 0;
218 }
219 
220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
221 {
222   Mat         A = (Mat) obj;
223   ViewerType  vtype;
224   int         ierr;
225 
226   if (!viewer) {
227     viewer = STDOUT_VIEWER_SELF;
228   }
229 
230   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
231   if (vtype == MATLAB_VIEWER) {
232     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
233   }
234   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235     return MatView_SeqBAIJ_ASCII(A,viewer);
236   }
237   else if (vtype == BINARY_FILE_VIEWER) {
238     return MatView_SeqBAIJ_Binary(A,viewer);
239   }
240   else if (vtype == DRAW_VIEWER) {
241     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
242   }
243   return 0;
244 }
245 
246 #define CHUNKSIZE  1
247 
248 /* This version has row oriented v  */
249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250 {
251   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
255   int          ridx,cidx,bs2=bs*bs;
256   Scalar      *ap,value,*aa=a->a,*bap;
257 
258   for ( k=0; k<m; k++ ) { /* loop over added rows */
259     row  = im[k]; brow = row/bs;
260     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
262     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
263     rmax = imax[brow]; nrow = ailen[brow];
264     low = 0;
265     for ( l=0; l<n; l++ ) { /* loop over added columns */
266       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268       col = in[l] - shift; bcol = col/bs;
269       ridx = row % bs; cidx = col % bs;
270       if (roworiented) {
271         value = *v++;
272       }
273       else {
274         value = v[k + l*m];
275       }
276       if (!sorted) low = 0; high = nrow;
277       while (high-low > 5) {
278         t = (low+high)/2;
279         if (rp[t] > bcol) high = t;
280         else             low  = t;
281       }
282       for ( i=low; i<high; i++ ) {
283         if (rp[i] > bcol) break;
284         if (rp[i] == bcol) {
285           bap  = ap +  bs2*i + bs*cidx + ridx;
286           if (is == ADD_VALUES) *bap += value;
287           else                  *bap  = value;
288           goto noinsert;
289         }
290       }
291       if (nonew) goto noinsert;
292       if (nrow >= rmax) {
293         /* there is no extra room in row, therefore enlarge */
294         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295         Scalar *new_a;
296 
297         /* malloc new storage space */
298         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
300         new_j   = (int *) (new_a + bs2*new_nz);
301         new_i   = new_j + new_nz;
302 
303         /* copy over old data into new slots */
304         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
305         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
306         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
307         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
308         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
309                                                            len*sizeof(int));
310         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
311         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
312         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
313                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
314         /* free up old matrix storage */
315         PetscFree(a->a);
316         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318         a->singlemalloc = 1;
319 
320         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
321         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
322         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323         a->maxnz += bs2*CHUNKSIZE;
324         a->reallocs++;
325         a->nz+=bs2;
326       }
327       N = nrow++ - 1;
328       /* shift up all the later entries in this row */
329       for ( ii=N; ii>=i; ii-- ) {
330         rp[ii+1] = rp[ii];
331          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332       }
333       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334       rp[i] = bcol;
335       ap[bs2*i + bs*cidx + ridx] = value;
336       noinsert:;
337       low = i;
338     }
339     ailen[brow] = nrow;
340   }
341   return 0;
342 }
343 
344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
345 {
346   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
347   *m = a->m; *n = a->n;
348   return 0;
349 }
350 
351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
352 {
353   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
354   *m = 0; *n = a->m;
355   return 0;
356 }
357 
358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
362   Scalar      *aa,*v_i,*aa_i;
363 
364   bs  = a->bs;
365   ai  = a->i;
366   aj  = a->j;
367   aa  = a->a;
368   bs2 = bs*bs;
369 
370   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
371 
372   bn  = row/bs;   /* Block number */
373   bp  = row % bs; /* Block Position */
374   M   = ai[bn+1] - ai[bn];
375   *nz = bs*M;
376 
377   if (v) {
378     *v = 0;
379     if (*nz) {
380       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
381       for ( i=0; i<M; i++ ) { /* for each block in the block row */
382         v_i  = *v + i*bs;
383         aa_i = aa + bs2*(ai[bn] + i);
384         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
385       }
386     }
387   }
388 
389   if (idx) {
390     *idx = 0;
391     if (*nz) {
392       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
393       for ( i=0; i<M; i++ ) { /* for each block in the block row */
394         idx_i = *idx + i*bs;
395         itmp  = bs*aj[ai[bn] + i];
396         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
397       }
398     }
399   }
400   return 0;
401 }
402 
403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
404 {
405   if (idx) {if (*idx) PetscFree(*idx);}
406   if (v)   {if (*v)   PetscFree(*v);}
407   return 0;
408 }
409 
410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411 {
412   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413   Mat         C;
414   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
416   Scalar      *array=a->a;
417 
418   if (B==PETSC_NULL && mbs!=nbs)
419     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421   PetscMemzero(col,(1+nbs)*sizeof(int));
422   if (shift) {
423     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
424   }
425   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
426   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
427   PetscFree(col);
428   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
429   cols = rows + bs;
430   for ( i=0; i<mbs; i++ ) {
431     cols[0] = i*bs;
432     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
433     len = ai[i+1] - ai[i];
434     for ( j=0; j<len; j++ ) {
435       rows[0] = (*aj++)*bs;
436       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
437       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
438       array += bs2;
439     }
440   }
441   PetscFree(rows);
442   if (shift) {
443     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
444   }
445 
446   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
447   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
448 
449   if (B != PETSC_NULL) {
450     *B = C;
451   } else {
452     /* This isn't really an in-place transpose */
453     PetscFree(a->a);
454     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
455     if (a->diag) PetscFree(a->diag);
456     if (a->ilen) PetscFree(a->ilen);
457     if (a->imax) PetscFree(a->imax);
458     if (a->solve_work) PetscFree(a->solve_work);
459     PetscFree(a);
460     PetscMemcpy(A,C,sizeof(struct _Mat));
461     PetscHeaderDestroy(C);
462   }
463   return 0;
464 }
465 
466 
467 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
468 {
469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
470   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
471   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
472   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
473   Scalar     *aa = a->a, *ap;
474 
475   if (mode == FLUSH_ASSEMBLY) return 0;
476 
477   for ( i=1; i<mbs; i++ ) {
478     /* move each row back by the amount of empty slots (fshift) before it*/
479     fshift += imax[i-1] - ailen[i-1];
480     if (fshift) {
481       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
482       N = ailen[i];
483       for ( j=0; j<N; j++ ) {
484         ip[j-fshift] = ip[j];
485         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
486       }
487     }
488     ai[i] = ai[i-1] + ailen[i-1];
489   }
490   if (mbs) {
491     fshift += imax[mbs-1] - ailen[mbs-1];
492     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
493   }
494   /* reset ilen and imax for each row */
495   for ( i=0; i<mbs; i++ ) {
496     ailen[i] = imax[i] = ai[i+1] - ai[i];
497   }
498   a->nz = (ai[m] + shift)*bs2;
499 
500   /* diagonals may have moved, so kill the diagonal pointers */
501   if (fshift && a->diag) {
502     PetscFree(a->diag);
503     PLogObjectMemory(A,-(m+1)*sizeof(int));
504     a->diag = 0;
505   }
506   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
507            fshift,a->nz,m);
508   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
509            a->reallocs);
510   return 0;
511 }
512 
513 static int MatZeroEntries_SeqBAIJ(Mat A)
514 {
515   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
516   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
517   return 0;
518 }
519 
520 int MatDestroy_SeqBAIJ(PetscObject obj)
521 {
522   Mat         A  = (Mat) obj;
523   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
524 
525 #if defined(PETSC_LOG)
526   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
527 #endif
528   PetscFree(a->a);
529   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
530   if (a->diag) PetscFree(a->diag);
531   if (a->ilen) PetscFree(a->ilen);
532   if (a->imax) PetscFree(a->imax);
533   if (a->solve_work) PetscFree(a->solve_work);
534   if (a->mult_work) PetscFree(a->mult_work);
535   PetscFree(a);
536   PLogObjectDestroy(A);
537   PetscHeaderDestroy(A);
538   return 0;
539 }
540 
541 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
542 {
543   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
544   if      (op == ROW_ORIENTED)              a->roworiented = 1;
545   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
546   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
547   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
548   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
549   else if (op == ROWS_SORTED ||
550            op == SYMMETRIC_MATRIX ||
551            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
552            op == YES_NEW_DIAGONALS)
553     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
554   else if (op == NO_NEW_DIAGONALS)
555     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
556   else
557     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
558   return 0;
559 }
560 
561 
562 /* -------------------------------------------------------*/
563 /* Should check that shapes of vectors and matrices match */
564 /* -------------------------------------------------------*/
565 #include "pinclude/plapack.h"
566 
567 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
568 {
569   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
570   Scalar          *xg,*yg;
571   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
572   register Scalar x1,x2,x3,x4,x5;
573   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
574   int             bs = a->bs,j,n,bs2 = bs*bs;
575 
576   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
577   PetscMemzero(y,m*sizeof(Scalar));
578   x     = x;
579   idx   = a->j;
580   v     = a->a;
581   ii    = a->i;
582 
583   switch (bs) {
584     case 1:
585      for ( i=0; i<m; i++ ) {
586        n    = ii[1] - ii[0]; ii++;
587        sum  = 0.0;
588        while (n--) sum += *v++ * x[*idx++];
589        y[i] = sum;
590       }
591       break;
592     case 2:
593       for ( i=0; i<mbs; i++ ) {
594         n  = ii[1] - ii[0]; ii++;
595         sum1 = 0.0; sum2 = 0.0;
596         for ( j=0; j<n; j++ ) {
597           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
598           sum1 += v[0]*x1 + v[2]*x2;
599           sum2 += v[1]*x1 + v[3]*x2;
600           v += 4;
601         }
602         y[0] += sum1; y[1] += sum2;
603         y += 2;
604       }
605       break;
606     case 3:
607       for ( i=0; i<mbs; i++ ) {
608         n  = ii[1] - ii[0]; ii++;
609         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
610         for ( j=0; j<n; j++ ) {
611           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
612           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
613           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
614           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
615           v += 9;
616         }
617         y[0] += sum1; y[1] += sum2; y[2] += sum3;
618         y += 3;
619       }
620       break;
621     case 4:
622       for ( i=0; i<mbs; i++ ) {
623         n  = ii[1] - ii[0]; ii++;
624         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
625         for ( j=0; j<n; j++ ) {
626           xb = x + 4*(*idx++);
627           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
628           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
629           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
630           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
631           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
632           v += 16;
633         }
634         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
635         y += 4;
636       }
637       break;
638     case 5:
639       for ( i=0; i<mbs; i++ ) {
640         n  = ii[1] - ii[0]; ii++;
641         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
642         for ( j=0; j<n; j++ ) {
643           xb = x + 5*(*idx++);
644           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
645           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
646           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
647           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
648           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
649           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
650           v += 25;
651         }
652         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
653         y += 5;
654       }
655       break;
656       /* block sizes larger then 5 by 5 are handled by BLAS */
657     default: {
658       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
659       if (!a->mult_work) {
660         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
661         CHKPTRQ(a->mult_work);
662       }
663       work = a->mult_work;
664       for ( i=0; i<mbs; i++ ) {
665         n     = ii[1] - ii[0]; ii++;
666         ncols = n*bs;
667         workt = work;
668         for ( j=0; j<n; j++ ) {
669           xb = x + bs*(*idx++);
670           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
671           workt += bs;
672         }
673         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
674         v += n*bs2;
675         y += bs;
676       }
677     }
678   }
679   PLogFlops(2*bs2*a->nz - m);
680   return 0;
681 }
682 
683 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
684 {
685   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
686   if (nz)  *nz  = a->bs*a->bs*a->nz;
687   if (nza) *nza = a->maxnz;
688   if (mem) *mem = (int)A->mem;
689   return 0;
690 }
691 
692 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
693 {
694   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
695 
696   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
697 
698   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
699   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
700       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
701     *flg = PETSC_FALSE; return 0;
702   }
703 
704   /* if the a->i are the same */
705   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
706     *flg = PETSC_FALSE; return 0;
707   }
708 
709   /* if a->j are the same */
710   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
711     *flg = PETSC_FALSE; return 0;
712   }
713 
714   /* if a->a are the same */
715   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
716     *flg = PETSC_FALSE; return 0;
717   }
718   *flg = PETSC_TRUE;
719   return 0;
720 
721 }
722 
723 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
724 {
725   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
726   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
727   Scalar      *x, zero = 0.0,*aa,*aa_j;
728 
729   bs   = a->bs;
730   aa   = a->a;
731   ai   = a->i;
732   aj   = a->j;
733   ambs = a->mbs;
734   bs2  = bs*bs;
735 
736   VecSet(&zero,v);
737   VecGetArray(v,&x); VecGetLocalSize(v,&n);
738   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
739   for ( i=0; i<ambs; i++ ) {
740     for ( j=ai[i]; j<ai[i+1]; j++ ) {
741       if (aj[j] == i) {
742         row  = i*bs;
743         aa_j = aa+j*bs2;
744         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
745         break;
746       }
747     }
748   }
749   return 0;
750 }
751 
752 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
753 {
754   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
755   Scalar      *l,*r,x,*v,*aa,*li,*ri;
756   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
757 
758   ai  = a->i;
759   aj  = a->j;
760   aa  = a->a;
761   m   = a->m;
762   n   = a->n;
763   bs  = a->bs;
764   mbs = a->mbs;
765   bs2 = bs*bs;
766   if (ll) {
767     VecGetArray(ll,&l); VecGetSize(ll,&lm);
768     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
769     for ( i=0; i<mbs; i++ ) { /* for each block row */
770       M  = ai[i+1] - ai[i];
771       li = l + i*bs;
772       v  = aa + bs2*ai[i];
773       for ( j=0; j<M; j++ ) { /* for each block */
774         for ( k=0; k<bs2; k++ ) {
775           (*v++) *= li[k%bs];
776         }
777       }
778     }
779   }
780 
781   if (rr) {
782     VecGetArray(rr,&r); VecGetSize(rr,&rn);
783     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
784     for ( i=0; i<mbs; i++ ) { /* for each block row */
785       M  = ai[i+1] - ai[i];
786       v  = aa + bs2*ai[i];
787       for ( j=0; j<M; j++ ) { /* for each block */
788         ri = r + bs*aj[ai[i]+j];
789         for ( k=0; k<bs; k++ ) {
790           x = ri[k];
791           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
792         }
793       }
794     }
795   }
796   return 0;
797 }
798 
799 
800 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
801 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
802 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
803 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
804 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
805 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
806 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
807 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
808 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
809 
810 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
811 {
812   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
813   Scalar      *v = a->a;
814   double      sum = 0.0;
815   int         i;
816 
817   if (type == NORM_FROBENIUS) {
818     for (i=0; i<a->nz; i++ ) {
819 #if defined(PETSC_COMPLEX)
820       sum += real(conj(*v)*(*v)); v++;
821 #else
822       sum += (*v)*(*v); v++;
823 #endif
824     }
825     *norm = sqrt(sum);
826   }
827   else {
828     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
829   }
830   return 0;
831 }
832 
833 /*
834      note: This can only work for identity for row and col. It would
835    be good to check this and otherwise generate an error.
836 */
837 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
838 {
839   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
840   Mat         outA;
841   int         ierr;
842 
843   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
844 
845   outA          = inA;
846   inA->factor   = FACTOR_LU;
847   a->row        = row;
848   a->col        = col;
849 
850   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
851 
852   if (!a->diag) {
853     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
854   }
855   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
856   return 0;
857 }
858 
859 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
860 {
861   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
862   int         one = 1, totalnz = a->bs*a->bs*a->nz;
863   BLscal_( &totalnz, alpha, a->a, &one );
864   PLogFlops(totalnz);
865   return 0;
866 }
867 
868 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
869 {
870   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
871   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
872   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
873   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
874   Scalar     *ap, *aa = a->a, zero = 0.0;
875 
876   for ( k=0; k<m; k++ ) { /* loop over rows */
877     row  = im[k]; brow = row/bs;
878     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
879     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
880     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
881     nrow = ailen[brow];
882     for ( l=0; l<n; l++ ) { /* loop over columns */
883       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
884       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
885       col  = in[l] - shift;
886       bcol = col/bs;
887       cidx = col%bs;
888       ridx = row%bs;
889       high = nrow;
890       low  = 0; /* assume unsorted */
891       while (high-low > 5) {
892         t = (low+high)/2;
893         if (rp[t] > bcol) high = t;
894         else             low  = t;
895       }
896       for ( i=low; i<high; i++ ) {
897         if (rp[i] > bcol) break;
898         if (rp[i] == bcol) {
899           *v++ = ap[bs2*i+bs*cidx+ridx];
900           goto finished;
901         }
902       }
903       *v++ = zero;
904       finished:;
905     }
906   }
907   return 0;
908 }
909 
910 int MatPrintHelp_SeqBAIJ(Mat A)
911 {
912   static int called = 0;
913 
914   if (called) return 0; else called = 1;
915   return 0;
916 }
917 /* -------------------------------------------------------------------*/
918 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
919        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
920        MatMult_SeqBAIJ,0,
921        0,0,
922        MatSolve_SeqBAIJ,0,
923        0,0,
924        MatLUFactor_SeqBAIJ,0,
925        0,
926        MatTranspose_SeqBAIJ,
927        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
928        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
929        0,MatAssemblyEnd_SeqBAIJ,
930        0,
931        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
932        MatGetReordering_SeqBAIJ,
933        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
934        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
935        MatILUFactorSymbolic_SeqBAIJ,0,
936        0,0,/*  MatConvert_SeqBAIJ  */ 0,
937        0,0,
938        MatConvertSameType_SeqBAIJ,0,0,
939        MatILUFactor_SeqBAIJ,0,0,
940        0,0,
941        MatGetValues_SeqBAIJ,0,
942        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
943        0};
944 
945 /*@C
946    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
947    compressed row) format.  For good matrix assembly performance the
948    user should preallocate the matrix storage by setting the parameter nz
949    (or nzz).  By setting these parameters accurately, performance can be
950    increased by more than a factor of 50.
951 
952    Input Parameters:
953 .  comm - MPI communicator, set to MPI_COMM_SELF
954 .  bs - size of block
955 .  m - number of rows
956 .  n - number of columns
957 .  nz - number of block nonzeros per block row (same for all rows)
958 .  nzz - number of block nonzeros per block row or PETSC_NULL
959          (possibly different for each row)
960 
961    Output Parameter:
962 .  A - the matrix
963 
964    Notes:
965    The block AIJ format is fully compatible with standard Fortran 77
966    storage.  That is, the stored row and column indices can begin at
967    either one (as in Fortran) or zero.  See the users' manual for details.
968 
969    Specify the preallocated storage with either nz or nnz (not both).
970    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
971    allocation.  For additional details, see the users manual chapter on
972    matrices and the file $(PETSC_DIR)/Performance.
973 
974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
975 @*/
976 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
977 {
978   Mat         B;
979   Mat_SeqBAIJ *b;
980   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
981 
982   if (mbs*bs!=m || nbs*bs!=n)
983     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
984 
985   *A = 0;
986   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
987   PLogObjectCreate(B);
988   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
989   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
990   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
991   switch (bs) {
992     case 1:
993       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
994       break;
995     case 2:
996       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
997       break;
998     case 3:
999       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1000       break;
1001     case 4:
1002       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1003       break;
1004     case 5:
1005       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1006       break;
1007   }
1008   B->destroy          = MatDestroy_SeqBAIJ;
1009   B->view             = MatView_SeqBAIJ;
1010   B->factor           = 0;
1011   B->lupivotthreshold = 1.0;
1012   b->row              = 0;
1013   b->col              = 0;
1014   b->reallocs         = 0;
1015 
1016   b->m       = m; B->m = m; B->M = m;
1017   b->n       = n; B->n = n; B->N = n;
1018   b->mbs     = mbs;
1019   b->nbs     = nbs;
1020   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1021   if (nnz == PETSC_NULL) {
1022     if (nz == PETSC_DEFAULT) nz = CHUNKSIZE;
1023     else if (nz <= 0)        nz = 1;
1024     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1025     nz = nz*mbs;
1026   }
1027   else {
1028     nz = 0;
1029     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1030   }
1031 
1032   /* allocate the matrix space */
1033   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1034   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1035   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1036   b->j  = (int *) (b->a + nz*bs2);
1037   PetscMemzero(b->j,nz*sizeof(int));
1038   b->i  = b->j + nz;
1039   b->singlemalloc = 1;
1040 
1041   b->i[0] = 0;
1042   for (i=1; i<mbs+1; i++) {
1043     b->i[i] = b->i[i-1] + b->imax[i-1];
1044   }
1045 
1046   /* b->ilen will count nonzeros in each block row so far. */
1047   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1048   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1049   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1050 
1051   b->bs               = bs;
1052   b->mbs              = mbs;
1053   b->nz               = 0;
1054   b->maxnz            = nz;
1055   b->sorted           = 0;
1056   b->roworiented      = 1;
1057   b->nonew            = 0;
1058   b->diag             = 0;
1059   b->solve_work       = 0;
1060   b->mult_work        = 0;
1061   b->spptr            = 0;
1062   b->indexshift       = 0;
1063   *A = B;
1064   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1065   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1066   return 0;
1067 }
1068 
1069 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1070 {
1071   Mat         C;
1072   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1073   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1074 
1075   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1076 
1077   *B = 0;
1078   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1079   PLogObjectCreate(C);
1080   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1081   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1082   C->destroy    = MatDestroy_SeqBAIJ;
1083   C->view       = MatView_SeqBAIJ;
1084   C->factor     = A->factor;
1085   c->row        = 0;
1086   c->col        = 0;
1087   C->assembled  = PETSC_TRUE;
1088 
1089   c->m = C->m   = a->m;
1090   c->n = C->n   = a->n;
1091   C->M          = a->m;
1092   C->N          = a->n;
1093 
1094   c->bs         = a->bs;
1095   c->mbs        = a->mbs;
1096   c->nbs        = a->nbs;
1097   c->indexshift = 0;
1098 
1099   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1100   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1101   for ( i=0; i<mbs; i++ ) {
1102     c->imax[i] = a->imax[i];
1103     c->ilen[i] = a->ilen[i];
1104   }
1105 
1106   /* allocate the matrix space */
1107   c->singlemalloc = 1;
1108   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1109   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1110   c->j  = (int *) (c->a + nz*bs2);
1111   c->i  = c->j + nz;
1112   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1113   if (mbs > 0) {
1114     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1115     if (cpvalues == COPY_VALUES) {
1116       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1117     }
1118   }
1119 
1120   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1121   c->sorted      = a->sorted;
1122   c->roworiented = a->roworiented;
1123   c->nonew       = a->nonew;
1124 
1125   if (a->diag) {
1126     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1127     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1128     for ( i=0; i<mbs; i++ ) {
1129       c->diag[i] = a->diag[i];
1130     }
1131   }
1132   else c->diag          = 0;
1133   c->nz                 = a->nz;
1134   c->maxnz              = a->maxnz;
1135   c->solve_work         = 0;
1136   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1137   c->mult_work          = 0;
1138   *B = C;
1139   return 0;
1140 }
1141 
1142 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1143 {
1144   Mat_SeqBAIJ  *a;
1145   Mat          B;
1146   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1147   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1148   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1149   int          *masked, nmask,tmp,ishift,bs2;
1150   Scalar       *aa;
1151   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1152 
1153   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1154   bs2  = bs*bs;
1155 
1156   MPI_Comm_size(comm,&size);
1157   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1158   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1159   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1160   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1161   M = header[1]; N = header[2]; nz = header[3];
1162 
1163   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1164 
1165   /*
1166      This code adds extra rows to make sure the number of rows is
1167     divisible by the blocksize
1168   */
1169   mbs        = M/bs;
1170   extra_rows = bs - M + bs*(mbs);
1171   if (extra_rows == bs) extra_rows = 0;
1172   else                  mbs++;
1173   if (extra_rows) {
1174     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1175   }
1176 
1177   /* read in row lengths */
1178   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1179   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1180   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1181 
1182   /* read in column indices */
1183   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1184   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1185   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1186 
1187   /* loop over row lengths determining block row lengths */
1188   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1189   PetscMemzero(browlengths,mbs*sizeof(int));
1190   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1191   PetscMemzero(mask,mbs*sizeof(int));
1192   masked = mask + mbs;
1193   rowcount = 0; nzcount = 0;
1194   for ( i=0; i<mbs; i++ ) {
1195     nmask = 0;
1196     for ( j=0; j<bs; j++ ) {
1197       kmax = rowlengths[rowcount];
1198       for ( k=0; k<kmax; k++ ) {
1199         tmp = jj[nzcount++]/bs;
1200         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1201       }
1202       rowcount++;
1203     }
1204     browlengths[i] += nmask;
1205     /* zero out the mask elements we set */
1206     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1207   }
1208 
1209   /* create our matrix */
1210   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1211          CHKERRQ(ierr);
1212   B = *A;
1213   a = (Mat_SeqBAIJ *) B->data;
1214 
1215   /* set matrix "i" values */
1216   a->i[0] = 0;
1217   for ( i=1; i<= mbs; i++ ) {
1218     a->i[i]      = a->i[i-1] + browlengths[i-1];
1219     a->ilen[i-1] = browlengths[i-1];
1220   }
1221   a->indexshift = 0;
1222   a->nz         = 0;
1223   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1224 
1225   /* read in nonzero values */
1226   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1227   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1228   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1229 
1230   /* set "a" and "j" values into matrix */
1231   nzcount = 0; jcount = 0;
1232   for ( i=0; i<mbs; i++ ) {
1233     nzcountb = nzcount;
1234     nmask    = 0;
1235     for ( j=0; j<bs; j++ ) {
1236       kmax = rowlengths[i*bs+j];
1237       for ( k=0; k<kmax; k++ ) {
1238         tmp = jj[nzcount++]/bs;
1239 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1240       }
1241       rowcount++;
1242     }
1243     /* sort the masked values */
1244     PetscSortInt(nmask,masked);
1245 
1246     /* set "j" values into matrix */
1247     maskcount = 1;
1248     for ( j=0; j<nmask; j++ ) {
1249       a->j[jcount++]  = masked[j];
1250       mask[masked[j]] = maskcount++;
1251     }
1252     /* set "a" values into matrix */
1253     ishift = bs2*a->i[i];
1254     for ( j=0; j<bs; j++ ) {
1255       kmax = rowlengths[i*bs+j];
1256       for ( k=0; k<kmax; k++ ) {
1257         tmp    = jj[nzcountb]/bs ;
1258         block  = mask[tmp] - 1;
1259         point  = jj[nzcountb] - bs*tmp;
1260         idx    = ishift + bs2*block + j + bs*point;
1261         a->a[idx] = aa[nzcountb++];
1262       }
1263     }
1264     /* zero out the mask elements we set */
1265     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1266   }
1267   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1268 
1269   PetscFree(rowlengths);
1270   PetscFree(browlengths);
1271   PetscFree(aa);
1272   PetscFree(jj);
1273   PetscFree(mask);
1274 
1275   B->assembled = PETSC_TRUE;
1276 
1277   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1278   if (flg) {
1279     Viewer tviewer;
1280     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1281     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1282     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1283     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1284   }
1285   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1286   if (flg) {
1287     Viewer tviewer;
1288     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1289     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1290     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1291     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1292   }
1293   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1294   if (flg) {
1295     Viewer tviewer;
1296     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1297     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1298     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1299   }
1300   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1301   if (flg) {
1302     Viewer tviewer;
1303     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1304     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1305     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1306     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1307   }
1308   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1309   if (flg) {
1310     Viewer tviewer;
1311     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1312     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1313     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1314     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1315   }
1316   return 0;
1317 }
1318 
1319 
1320 
1321