xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 13a67aface5a148d688de3dc83f9ec17fbd665a2)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.28 1996/04/05 21:52:51 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 #define CHUNKSIZE  1
225 
226 /* This version has row oriented v  */
227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
228 {
229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
231   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
232   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
233   int          ridx,cidx,bs2=bs*bs;
234   Scalar      *ap,value,*aa=a->a,*bap;
235 
236   for ( k=0; k<m; k++ ) { /* loop over added rows */
237     row  = im[k]; brow = row/bs;
238     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
239     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
240     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
241     rmax = imax[brow]; nrow = ailen[brow];
242     low = 0;
243     for ( l=0; l<n; l++ ) { /* loop over added columns */
244       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
245       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
246       col = in[l] - shift; bcol = col/bs;
247       ridx = row % bs; cidx = col % bs;
248       if (roworiented) {
249         value = *v++;
250       }
251       else {
252         value = v[k + l*m];
253       }
254       if (!sorted) low = 0; high = nrow;
255       while (high-low > 5) {
256         t = (low+high)/2;
257         if (rp[t] > bcol) high = t;
258         else             low  = t;
259       }
260       for ( i=low; i<high; i++ ) {
261         if (rp[i] > bcol) break;
262         if (rp[i] == bcol) {
263           bap  = ap +  bs2*i + bs*cidx + ridx;
264           if (is == ADD_VALUES) *bap += value;
265           else                  *bap  = value;
266           goto noinsert;
267         }
268       }
269       if (nonew) goto noinsert;
270       if (nrow >= rmax) {
271         /* there is no extra room in row, therefore enlarge */
272         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
273         Scalar *new_a;
274 
275         /* malloc new storage space */
276         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
277         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
278         new_j   = (int *) (new_a + bs2*new_nz);
279         new_i   = new_j + new_nz;
280 
281         /* copy over old data into new slots */
282         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
283         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
284         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
285         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
286         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
287                                                            len*sizeof(int));
288         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
289         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
290         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
291                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
292         /* free up old matrix storage */
293         PetscFree(a->a);
294         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
295         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
296         a->singlemalloc = 1;
297 
298         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
299         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
300         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
301         a->maxnz += bs2*CHUNKSIZE;
302         a->reallocs++;
303         a->nz+=bs2;
304       }
305       N = nrow++ - 1;
306       /* shift up all the later entries in this row */
307       for ( ii=N; ii>=i; ii-- ) {
308         rp[ii+1] = rp[ii];
309          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
310       }
311       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
312       rp[i] = bcol;
313       ap[bs2*i + bs*cidx + ridx] = value;
314       noinsert:;
315       low = i;
316     }
317     ailen[brow] = nrow;
318   }
319   return 0;
320 }
321 
322 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
323 {
324   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
325   *m = a->m; *n = a->n;
326   return 0;
327 }
328 
329 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
330 {
331   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
332   *m = 0; *n = a->m;
333   return 0;
334 }
335 
336 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
337 {
338   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
339   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
340   Scalar      *aa,*v_i,*aa_i;
341 
342   bs  = a->bs;
343   ai  = a->i;
344   aj  = a->j;
345   aa  = a->a;
346   bs2 = bs*bs;
347 
348   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
349 
350   bn  = row/bs;   /* Block number */
351   bp  = row % bs; /* Block Position */
352   M   = ai[bn+1] - ai[bn];
353   *nz = bs*M;
354 
355   if (v) {
356     *v = 0;
357     if (*nz) {
358       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
359       for ( i=0; i<M; i++ ) { /* for each block in the block row */
360         v_i  = *v + i*bs;
361         aa_i = aa + bs2*(ai[bn] + i);
362         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
363       }
364     }
365   }
366 
367   if (idx) {
368     *idx = 0;
369     if (*nz) {
370       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
371       for ( i=0; i<M; i++ ) { /* for each block in the block row */
372         idx_i = *idx + i*bs;
373         itmp  = bs*aj[ai[bn] + i];
374         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
375       }
376     }
377   }
378   return 0;
379 }
380 
381 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
382 {
383   if (idx) {if (*idx) PetscFree(*idx);}
384   if (v)   {if (*v)   PetscFree(*v);}
385   return 0;
386 }
387 
388 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
389 {
390   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
391   Mat         C;
392   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
393   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
394   Scalar      *array=a->a;
395 
396   if (B==PETSC_NULL && mbs!=nbs)
397     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
398   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
399   PetscMemzero(col,(1+nbs)*sizeof(int));
400   if (shift) {
401     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
402   }
403   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
404   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
405   PetscFree(col);
406   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
407   cols = rows + bs;
408   for ( i=0; i<mbs; i++ ) {
409     cols[0] = i*bs;
410     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
411     len = ai[i+1] - ai[i];
412     for ( j=0; j<len; j++ ) {
413       rows[0] = (*aj++)*bs;
414       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
415       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
416       array += bs2;
417     }
418   }
419   PetscFree(rows);
420   if (shift) {
421     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
422   }
423 
424   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
425   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
426 
427   if (B != PETSC_NULL) {
428     *B = C;
429   } else {
430     /* This isn't really an in-place transpose */
431     PetscFree(a->a);
432     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
433     if (a->diag) PetscFree(a->diag);
434     if (a->ilen) PetscFree(a->ilen);
435     if (a->imax) PetscFree(a->imax);
436     if (a->solve_work) PetscFree(a->solve_work);
437     PetscFree(a);
438     PetscMemcpy(A,C,sizeof(struct _Mat));
439     PetscHeaderDestroy(C);
440   }
441   return 0;
442 }
443 
444 
445 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
446 {
447   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
448   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
449   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
450   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
451   Scalar     *aa = a->a, *ap;
452 
453   if (mode == FLUSH_ASSEMBLY) return 0;
454 
455   for ( i=1; i<mbs; i++ ) {
456     /* move each row back by the amount of empty slots (fshift) before it*/
457     fshift += imax[i-1] - ailen[i-1];
458     if (fshift) {
459       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
460       N = ailen[i];
461       for ( j=0; j<N; j++ ) {
462         ip[j-fshift] = ip[j];
463         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
464       }
465     }
466     ai[i] = ai[i-1] + ailen[i-1];
467   }
468   if (mbs) {
469     fshift += imax[mbs-1] - ailen[mbs-1];
470     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
471   }
472   /* reset ilen and imax for each row */
473   for ( i=0; i<mbs; i++ ) {
474     ailen[i] = imax[i] = ai[i+1] - ai[i];
475   }
476   a->nz = (ai[m] + shift)*bs2;
477 
478   /* diagonals may have moved, so kill the diagonal pointers */
479   if (fshift && a->diag) {
480     PetscFree(a->diag);
481     PLogObjectMemory(A,-(m+1)*sizeof(int));
482     a->diag = 0;
483   }
484   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
485            fshift,a->nz,m);
486   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
487            a->reallocs);
488   return 0;
489 }
490 
491 static int MatZeroEntries_SeqBAIJ(Mat A)
492 {
493   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
494   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
495   return 0;
496 }
497 
498 int MatDestroy_SeqBAIJ(PetscObject obj)
499 {
500   Mat         A  = (Mat) obj;
501   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
502 
503 #if defined(PETSC_LOG)
504   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
505 #endif
506   PetscFree(a->a);
507   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
508   if (a->diag) PetscFree(a->diag);
509   if (a->ilen) PetscFree(a->ilen);
510   if (a->imax) PetscFree(a->imax);
511   if (a->solve_work) PetscFree(a->solve_work);
512   if (a->mult_work) PetscFree(a->mult_work);
513   PetscFree(a);
514   PLogObjectDestroy(A);
515   PetscHeaderDestroy(A);
516   return 0;
517 }
518 
519 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
520 {
521   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
522   if      (op == ROW_ORIENTED)              a->roworiented = 1;
523   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
524   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
525   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
526   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
527   else if (op == ROWS_SORTED ||
528            op == SYMMETRIC_MATRIX ||
529            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
530            op == YES_NEW_DIAGONALS)
531     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
532   else if (op == NO_NEW_DIAGONALS)
533     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
534   else
535     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
536   return 0;
537 }
538 
539 
540 /* -------------------------------------------------------*/
541 /* Should check that shapes of vectors and matrices match */
542 /* -------------------------------------------------------*/
543 #include "pinclude/plapack.h"
544 
545 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
546 {
547   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
548   Scalar          *xg,*yg;
549   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
550   register Scalar x1,x2,x3,x4,x5;
551   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
552   int             bs = a->bs,j,n,bs2 = bs*bs;
553 
554   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
555   PetscMemzero(y,m*sizeof(Scalar));
556   x     = x;
557   idx   = a->j;
558   v     = a->a;
559   ii    = a->i;
560 
561   switch (bs) {
562     case 1:
563      for ( i=0; i<m; i++ ) {
564        n    = ii[1] - ii[0]; ii++;
565        sum  = 0.0;
566        while (n--) sum += *v++ * x[*idx++];
567        y[i] = sum;
568       }
569       break;
570     case 2:
571       for ( i=0; i<mbs; i++ ) {
572         n  = ii[1] - ii[0]; ii++;
573         sum1 = 0.0; sum2 = 0.0;
574         for ( j=0; j<n; j++ ) {
575           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
576           sum1 += v[0]*x1 + v[2]*x2;
577           sum2 += v[1]*x1 + v[3]*x2;
578           v += 4;
579         }
580         y[0] += sum1; y[1] += sum2;
581         y += 2;
582       }
583       break;
584     case 3:
585       for ( i=0; i<mbs; i++ ) {
586         n  = ii[1] - ii[0]; ii++;
587         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
588         for ( j=0; j<n; j++ ) {
589           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
590           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
591           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
592           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
593           v += 9;
594         }
595         y[0] += sum1; y[1] += sum2; y[2] += sum3;
596         y += 3;
597       }
598       break;
599     case 4:
600       for ( i=0; i<mbs; i++ ) {
601         n  = ii[1] - ii[0]; ii++;
602         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
603         for ( j=0; j<n; j++ ) {
604           xb = x + 4*(*idx++);
605           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
606           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
607           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
608           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
609           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
610           v += 16;
611         }
612         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
613         y += 4;
614       }
615       break;
616     case 5:
617       for ( i=0; i<mbs; i++ ) {
618         n  = ii[1] - ii[0]; ii++;
619         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
620         for ( j=0; j<n; j++ ) {
621           xb = x + 5*(*idx++);
622           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
623           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
624           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
625           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
626           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
627           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
628           v += 25;
629         }
630         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
631         y += 5;
632       }
633       break;
634       /* block sizes larger then 5 by 5 are handled by BLAS */
635     default: {
636       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
637       if (!a->mult_work) {
638         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
639         CHKPTRQ(a->mult_work);
640       }
641       work = a->mult_work;
642       for ( i=0; i<mbs; i++ ) {
643         n     = ii[1] - ii[0]; ii++;
644         ncols = n*bs;
645         workt = work;
646         for ( j=0; j<n; j++ ) {
647           xb = x + bs*(*idx++);
648           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
649           workt += bs;
650         }
651         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
652         v += n*bs2;
653         y += bs;
654       }
655     }
656   }
657   PLogFlops(2*bs2*a->nz - m);
658   return 0;
659 }
660 
661 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
662 {
663   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
664   if (nz)  *nz  = a->bs*a->bs*a->nz;
665   if (nza) *nza = a->maxnz;
666   if (mem) *mem = (int)A->mem;
667   return 0;
668 }
669 
670 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
671 {
672   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
673 
674   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
675 
676   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
677   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
678       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
679     *flg = PETSC_FALSE; return 0;
680   }
681 
682   /* if the a->i are the same */
683   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
684     *flg = PETSC_FALSE; return 0;
685   }
686 
687   /* if a->j are the same */
688   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
689     *flg = PETSC_FALSE; return 0;
690   }
691 
692   /* if a->a are the same */
693   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
694     *flg = PETSC_FALSE; return 0;
695   }
696   *flg = PETSC_TRUE;
697   return 0;
698 
699 }
700 
701 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
702 {
703   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
704   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
705   Scalar      *x, zero = 0.0,*aa,*aa_j;
706 
707   bs   = a->bs;
708   aa   = a->a;
709   ai   = a->i;
710   aj   = a->j;
711   ambs = a->mbs;
712   bs2  = bs*bs;
713 
714   VecSet(&zero,v);
715   VecGetArray(v,&x); VecGetLocalSize(v,&n);
716   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
717   for ( i=0; i<ambs; i++ ) {
718     for ( j=ai[i]; j<ai[i+1]; j++ ) {
719       if (aj[j] == i) {
720         row  = i*bs;
721         aa_j = aa+j*bs2;
722         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
723         break;
724       }
725     }
726   }
727   return 0;
728 }
729 
730 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
731 {
732   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
733   Scalar      *l,*r,x,*v,*aa,*li,*ri;
734   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
735 
736   ai  = a->i;
737   aj  = a->j;
738   aa  = a->a;
739   m   = a->m;
740   n   = a->n;
741   bs  = a->bs;
742   mbs = a->mbs;
743   bs2 = bs*bs;
744   if (ll) {
745     VecGetArray(ll,&l); VecGetSize(ll,&lm);
746     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
747     for ( i=0; i<mbs; i++ ) { /* for each block row */
748       M  = ai[i+1] - ai[i];
749       li = l + i*bs;
750       v  = aa + bs2*ai[i];
751       for ( j=0; j<M; j++ ) { /* for each block */
752         for ( k=0; k<bs2; k++ ) {
753           (*v++) *= li[k%bs];
754         }
755       }
756     }
757   }
758 
759   if (rr) {
760     VecGetArray(rr,&r); VecGetSize(rr,&rn);
761     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
762     for ( i=0; i<mbs; i++ ) { /* for each block row */
763       M  = ai[i+1] - ai[i];
764       v  = aa + bs2*ai[i];
765       for ( j=0; j<M; j++ ) { /* for each block */
766         ri = r + bs*aj[ai[i]+j];
767         for ( k=0; k<bs; k++ ) {
768           x = ri[k];
769           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
770         }
771       }
772     }
773   }
774   return 0;
775 }
776 
777 
778 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
779 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
780 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
781 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
782 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
783 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
784 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
785 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
786 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
787 
788 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
789 {
790   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
791   Scalar      *v = a->a;
792   double      sum = 0.0;
793   int         i;
794 
795   if (type == NORM_FROBENIUS) {
796     for (i=0; i<a->nz; i++ ) {
797 #if defined(PETSC_COMPLEX)
798       sum += real(conj(*v)*(*v)); v++;
799 #else
800       sum += (*v)*(*v); v++;
801 #endif
802     }
803     *norm = sqrt(sum);
804   }
805   else {
806     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
807   }
808   return 0;
809 }
810 
811 /*
812      note: This can only work for identity for row and col. It would
813    be good to check this and otherwise generate an error.
814 */
815 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
816 {
817   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
818   Mat         outA;
819   int         ierr;
820 
821   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
822 
823   outA          = inA;
824   inA->factor   = FACTOR_LU;
825   a->row        = row;
826   a->col        = col;
827 
828   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
829 
830   if (!a->diag) {
831     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
832   }
833   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
834   return 0;
835 }
836 
837 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
838 {
839   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
840   int         one = 1, totalnz = a->bs*a->bs*a->nz;
841   BLscal_( &totalnz, alpha, a->a, &one );
842   PLogFlops(totalnz);
843   return 0;
844 }
845 
846 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
847 {
848   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
849   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
850   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
851   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
852   Scalar     *ap, *aa = a->a, zero = 0.0;
853 
854   for ( k=0; k<m; k++ ) { /* loop over rows */
855     row  = im[k]; brow = row/bs;
856     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
857     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
858     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
859     nrow = ailen[brow];
860     for ( l=0; l<n; l++ ) { /* loop over columns */
861       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
862       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
863       col  = in[l] - shift;
864       bcol = col/bs;
865       cidx = col%bs;
866       ridx = row%bs;
867       high = nrow;
868       low  = 0; /* assume unsorted */
869       while (high-low > 5) {
870         t = (low+high)/2;
871         if (rp[t] > bcol) high = t;
872         else             low  = t;
873       }
874       for ( i=low; i<high; i++ ) {
875         if (rp[i] > bcol) break;
876         if (rp[i] == bcol) {
877           *v++ = ap[bs2*i+bs*cidx+ridx];
878           goto finished;
879         }
880       }
881       *v++ = zero;
882       finished:;
883     }
884   }
885   return 0;
886 }
887 
888 int MatPrintHelp_SeqBAIJ(Mat A)
889 {
890   static int called = 0;
891 
892   if (called) return 0; else called = 1;
893   return 0;
894 }
895 /* -------------------------------------------------------------------*/
896 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
897        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
898        MatMult_SeqBAIJ,0,
899        0,0,
900        MatSolve_SeqBAIJ,0,
901        0,0,
902        MatLUFactor_SeqBAIJ,0,
903        0,
904        MatTranspose_SeqBAIJ,
905        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
906        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
907        0,MatAssemblyEnd_SeqBAIJ,
908        0,
909        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
910        MatGetReordering_SeqBAIJ,
911        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
912        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
913        MatILUFactorSymbolic_SeqBAIJ,0,
914        0,0,/*  MatConvert_SeqBAIJ  */ 0,
915        0,0,
916        MatConvertSameType_SeqBAIJ,0,0,
917        MatILUFactor_SeqBAIJ,0,0,
918        0,0,
919        MatGetValues_SeqBAIJ,0,
920        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
921        0};
922 
923 /*@C
924    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
925    (the default parallel PETSc format).  For good matrix assembly performance
926    the user should preallocate the matrix storage by setting the parameter nz
927    (or nzz).  By setting these parameters accurately, performance can be
928    increased by more than a factor of 50.
929 
930    Input Parameters:
931 .  comm - MPI communicator, set to MPI_COMM_SELF
932 .  bs - size of block
933 .  m - number of rows
934 .  n - number of columns
935 .  nz - number of block nonzeros per block row (same for all rows)
936 .  nzz - number of block nonzeros per block row or PETSC_NULL
937          (possibly different for each row)
938 
939    Output Parameter:
940 .  A - the matrix
941 
942    Notes:
943    The BAIJ format, is fully compatible with standard Fortran 77
944    storage.  That is, the stored row and column indices can begin at
945    either one (as in Fortran) or zero.  See the users manual for details.
946 
947    Specify the preallocated storage with either nz or nnz (not both).
948    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
949    allocation.  For additional details, see the users manual chapter on
950    matrices and the file $(PETSC_DIR)/Performance.
951 
952 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
953 @*/
954 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
955 {
956   Mat         B;
957   Mat_SeqBAIJ *b;
958   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
959 
960   if (mbs*bs!=m || nbs*bs!=n)
961     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
962 
963   *A = 0;
964   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
965   PLogObjectCreate(B);
966   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
967   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
968   switch (bs) {
969     case 1:
970       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
971       break;
972     case 2:
973       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
974       break;
975     case 3:
976       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
977       break;
978     case 4:
979       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
980       break;
981     case 5:
982       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
983       break;
984   }
985   B->destroy          = MatDestroy_SeqBAIJ;
986   B->view             = MatView_SeqBAIJ;
987   B->factor           = 0;
988   B->lupivotthreshold = 1.0;
989   b->row              = 0;
990   b->col              = 0;
991   b->reallocs         = 0;
992 
993   b->m       = m;
994   b->mbs     = mbs;
995   b->n       = n;
996   b->nbs     = nbs;
997   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
998   if (nnz == PETSC_NULL) {
999     if (nz == PETSC_DEFAULT) nz = CHUNKSIZE;
1000     else if (nz <= 0)        nz = 1;
1001     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1002     nz = nz*mbs;
1003   }
1004   else {
1005     nz = 0;
1006     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1007   }
1008 
1009   /* allocate the matrix space */
1010   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1011   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1012   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1013   b->j  = (int *) (b->a + nz*bs2);
1014   PetscMemzero(b->j,nz*sizeof(int));
1015   b->i  = b->j + nz;
1016   b->singlemalloc = 1;
1017 
1018   b->i[0] = 0;
1019   for (i=1; i<mbs+1; i++) {
1020     b->i[i] = b->i[i-1] + b->imax[i-1];
1021   }
1022 
1023   /* b->ilen will count nonzeros in each block row so far. */
1024   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1025   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1026   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1027 
1028   b->bs               = bs;
1029   b->mbs              = mbs;
1030   b->nz               = 0;
1031   b->maxnz            = nz;
1032   b->sorted           = 0;
1033   b->roworiented      = 1;
1034   b->nonew            = 0;
1035   b->diag             = 0;
1036   b->solve_work       = 0;
1037   b->mult_work        = 0;
1038   b->spptr            = 0;
1039   b->indexshift       = 0;
1040   *A = B;
1041   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1042   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1043   return 0;
1044 }
1045 
1046 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1047 {
1048   Mat         C;
1049   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1050   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1051 
1052   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1053 
1054   *B = 0;
1055   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1056   PLogObjectCreate(C);
1057   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1058   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1059   C->destroy    = MatDestroy_SeqBAIJ;
1060   C->view       = MatView_SeqBAIJ;
1061   C->factor     = A->factor;
1062   c->row        = 0;
1063   c->col        = 0;
1064   C->assembled  = PETSC_TRUE;
1065 
1066   c->m          = a->m;
1067   c->n          = a->n;
1068   c->bs         = a->bs;
1069   c->mbs        = a->mbs;
1070   c->nbs        = a->nbs;
1071   c->indexshift = 0;
1072 
1073   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1074   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1075   for ( i=0; i<mbs; i++ ) {
1076     c->imax[i] = a->imax[i];
1077     c->ilen[i] = a->ilen[i];
1078   }
1079 
1080   /* allocate the matrix space */
1081   c->singlemalloc = 1;
1082   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1083   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1084   c->j  = (int *) (c->a + nz*bs2);
1085   c->i  = c->j + nz;
1086   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1087   if (mbs > 0) {
1088     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1089     if (cpvalues == COPY_VALUES) {
1090       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1091     }
1092   }
1093 
1094   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1095   c->sorted      = a->sorted;
1096   c->roworiented = a->roworiented;
1097   c->nonew       = a->nonew;
1098 
1099   if (a->diag) {
1100     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1101     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1102     for ( i=0; i<mbs; i++ ) {
1103       c->diag[i] = a->diag[i];
1104     }
1105   }
1106   else c->diag          = 0;
1107   c->nz                 = a->nz;
1108   c->maxnz              = a->maxnz;
1109   c->solve_work         = 0;
1110   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1111   c->mult_work          = 0;
1112   *B = C;
1113   return 0;
1114 }
1115 
1116 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1117 {
1118   Mat_SeqBAIJ  *a;
1119   Mat          B;
1120   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1121   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1122   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1123   int          *masked, nmask,tmp,ishift,bs2;
1124   Scalar       *aa;
1125   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1126 
1127   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1128   bs2  = bs*bs;
1129 
1130   MPI_Comm_size(comm,&size);
1131   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1132   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1133   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1134   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1135   M = header[1]; N = header[2]; nz = header[3];
1136 
1137   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1138 
1139   /*
1140      This code adds extra rows to make sure the number of rows is
1141     divisible by the blocksize
1142   */
1143   mbs        = M/bs;
1144   extra_rows = bs - M + bs*(mbs);
1145   if (extra_rows == bs) extra_rows = 0;
1146   else                  mbs++;
1147   if (extra_rows) {
1148     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1149   }
1150 
1151   /* read in row lengths */
1152   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1153   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1154   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1155 
1156   /* read in column indices */
1157   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1158   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1159   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1160 
1161   /* loop over row lengths determining block row lengths */
1162   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1163   PetscMemzero(browlengths,mbs*sizeof(int));
1164   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1165   PetscMemzero(mask,mbs*sizeof(int));
1166   masked = mask + mbs;
1167   rowcount = 0; nzcount = 0;
1168   for ( i=0; i<mbs; i++ ) {
1169     nmask = 0;
1170     for ( j=0; j<bs; j++ ) {
1171       kmax = rowlengths[rowcount];
1172       for ( k=0; k<kmax; k++ ) {
1173         tmp = jj[nzcount++]/bs;
1174         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1175       }
1176       rowcount++;
1177     }
1178     browlengths[i] += nmask;
1179     /* zero out the mask elements we set */
1180     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1181   }
1182 
1183   /* create our matrix */
1184   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1185          CHKERRQ(ierr);
1186   B = *A;
1187   a = (Mat_SeqBAIJ *) B->data;
1188 
1189   /* set matrix "i" values */
1190   a->i[0] = 0;
1191   for ( i=1; i<= mbs; i++ ) {
1192     a->i[i]      = a->i[i-1] + browlengths[i-1];
1193     a->ilen[i-1] = browlengths[i-1];
1194   }
1195   a->indexshift = 0;
1196   a->nz         = 0;
1197   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1198 
1199   /* read in nonzero values */
1200   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1201   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1202   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1203 
1204   /* set "a" and "j" values into matrix */
1205   nzcount = 0; jcount = 0;
1206   for ( i=0; i<mbs; i++ ) {
1207     nzcountb = nzcount;
1208     nmask    = 0;
1209     for ( j=0; j<bs; j++ ) {
1210       kmax = rowlengths[i*bs+j];
1211       for ( k=0; k<kmax; k++ ) {
1212         tmp = jj[nzcount++]/bs;
1213 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1214       }
1215       rowcount++;
1216     }
1217     /* sort the masked values */
1218     PetscSortInt(nmask,masked);
1219 
1220     /* set "j" values into matrix */
1221     maskcount = 1;
1222     for ( j=0; j<nmask; j++ ) {
1223       a->j[jcount++]  = masked[j];
1224       mask[masked[j]] = maskcount++;
1225     }
1226     /* set "a" values into matrix */
1227     ishift = bs2*a->i[i];
1228     for ( j=0; j<bs; j++ ) {
1229       kmax = rowlengths[i*bs+j];
1230       for ( k=0; k<kmax; k++ ) {
1231         tmp    = jj[nzcountb]/bs ;
1232         block  = mask[tmp] - 1;
1233         point  = jj[nzcountb] - bs*tmp;
1234         idx    = ishift + bs2*block + j + bs*point;
1235         a->a[idx] = aa[nzcountb++];
1236       }
1237     }
1238     /* zero out the mask elements we set */
1239     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1240   }
1241   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1242 
1243   PetscFree(rowlengths);
1244   PetscFree(browlengths);
1245   PetscFree(aa);
1246   PetscFree(jj);
1247   PetscFree(mask);
1248 
1249   B->assembled = PETSC_TRUE;
1250 
1251   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1252   if (flg) {
1253     Viewer tviewer;
1254     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1255     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1256     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1257     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1258   }
1259   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1260   if (flg) {
1261     Viewer tviewer;
1262     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1263     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1264     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1265     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1266   }
1267   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1268   if (flg) {
1269     Viewer tviewer;
1270     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1271     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1272     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1273   }
1274   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1275   if (flg) {
1276     Viewer tviewer;
1277     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1278     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1279     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1280     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1281   }
1282   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1283   if (flg) {
1284     Viewer tviewer;
1285     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1286     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1287     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1288     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1289   }
1290   return 0;
1291 }
1292 
1293 
1294 
1295