xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f2501298210485f1203ef904b88cfc58e030e56a)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.27 1996/04/05 16:20:05 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 #define CHUNKSIZE  1
225 
226 /* This version has row oriented v  */
227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
228 {
229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
231   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
232   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
233   int          ridx,cidx,bs2=bs*bs;
234   Scalar      *ap,value,*aa=a->a,*bap;
235 
236   for ( k=0; k<m; k++ ) { /* loop over added rows */
237     row  = im[k]; brow = row/bs;
238     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
239     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
240     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
241     rmax = imax[brow]; nrow = ailen[brow];
242     low = 0;
243     for ( l=0; l<n; l++ ) { /* loop over added columns */
244       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
245       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
246       col = in[l] - shift; bcol = col/bs;
247       ridx = row % bs; cidx = col % bs;
248       if (roworiented) {
249         value = *v++;
250       }
251       else {
252         value = v[k + l*m];
253       }
254       if (!sorted) low = 0; high = nrow;
255       while (high-low > 5) {
256         t = (low+high)/2;
257         if (rp[t] > bcol) high = t;
258         else             low  = t;
259       }
260       for ( i=low; i<high; i++ ) {
261         if (rp[i] > bcol) break;
262         if (rp[i] == bcol) {
263           bap  = ap +  bs2*i + bs*cidx + ridx;
264           if (is == ADD_VALUES) *bap += value;
265           else                  *bap  = value;
266           goto noinsert;
267         }
268       }
269       if (nonew) goto noinsert;
270       if (nrow >= rmax) {
271         /* there is no extra room in row, therefore enlarge */
272         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
273         Scalar *new_a;
274 
275         /* malloc new storage space */
276         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
277         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
278         new_j   = (int *) (new_a + bs2*new_nz);
279         new_i   = new_j + new_nz;
280 
281         /* copy over old data into new slots */
282         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
283         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
284         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
285         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
286         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
287                                                            len*sizeof(int));
288         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
289         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
290         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
291                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
292         /* free up old matrix storage */
293         PetscFree(a->a);
294         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
295         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
296         a->singlemalloc = 1;
297 
298         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
299         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
300         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
301         a->maxnz += bs2*CHUNKSIZE;
302         a->reallocs++;
303         a->nz+=bs2;
304       }
305       N = nrow++ - 1;
306       /* shift up all the later entries in this row */
307       for ( ii=N; ii>=i; ii-- ) {
308         rp[ii+1] = rp[ii];
309          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
310       }
311       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
312       rp[i] = bcol;
313       ap[bs2*i + bs*cidx + ridx] = value;
314       noinsert:;
315       low = i;
316     }
317     ailen[brow] = nrow;
318   }
319   return 0;
320 }
321 
322 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
323 {
324   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
325   *m = a->m; *n = a->n;
326   return 0;
327 }
328 
329 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
330 {
331   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
332   *m = 0; *n = a->m;
333   return 0;
334 }
335 
336 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
337 {
338   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
339   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
340   Scalar      *aa,*v_i,*aa_i;
341 
342   bs  = a->bs;
343   ai  = a->i;
344   aj  = a->j;
345   aa  = a->a;
346   bs2 = bs*bs;
347 
348   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
349 
350   bn  = row/bs;   /* Block number */
351   bp  = row % bs; /* Block Position */
352   M   = ai[bn+1] - ai[bn];
353   *nz = bs*M;
354 
355   if (v) {
356     *v = 0;
357     if (*nz) {
358       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
359       for ( i=0; i<M; i++ ) { /* for each block in the block row */
360         v_i  = *v + i*bs;
361         aa_i = aa + bs2*(ai[bn] + i);
362         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
363       }
364     }
365   }
366 
367   if (idx) {
368     *idx = 0;
369     if (*nz) {
370       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
371       for ( i=0; i<M; i++ ) { /* for each block in the block row */
372         idx_i = *idx + i*bs;
373         itmp  = bs*aj[ai[bn] + i];
374         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
375       }
376     }
377   }
378   return 0;
379 }
380 
381 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
382 {
383   if (idx) {if (*idx) PetscFree(*idx);}
384   if (v)   {if (*v)   PetscFree(*v);}
385   return 0;
386 }
387 
388 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
389 {
390   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
391   Mat         C;
392   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
393   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
394   Scalar      *array=a->a;
395 
396   if (B==PETSC_NULL && mbs!=nbs)
397     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
398   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
399   PetscMemzero(col,(1+nbs)*sizeof(int));
400   if (shift) {
401     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
402   }
403   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
404   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
405   PetscFree(col);
406   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
407   cols = rows + bs;
408   for ( i=0; i<mbs; i++ ) {
409     cols[0] = i*bs;
410     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
411     len = ai[i+1] - ai[i];
412     for ( j=0; j<len; j++ ) {
413       rows[0] = (*aj++)*bs;
414       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
415       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
416       array += bs2;
417     }
418   }
419   if (shift) {
420     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
421   }
422 
423   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
424   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
425 
426   if (B != PETSC_NULL) {
427     *B = C;
428   } else {
429     /* This isn't really an in-place transpose */
430     PetscFree(a->a);
431     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
432     if (a->diag) PetscFree(a->diag);
433     if (a->ilen) PetscFree(a->ilen);
434     if (a->imax) PetscFree(a->imax);
435     if (a->solve_work) PetscFree(a->solve_work);
436     PetscFree(a);
437     PetscMemcpy(A,C,sizeof(struct _Mat));
438     PetscHeaderDestroy(C);
439   }
440   return 0;
441 }
442 
443 
444 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
445 {
446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
447   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
448   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
449   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
450   Scalar     *aa = a->a, *ap;
451 
452   if (mode == FLUSH_ASSEMBLY) return 0;
453 
454   for ( i=1; i<mbs; i++ ) {
455     /* move each row back by the amount of empty slots (fshift) before it*/
456     fshift += imax[i-1] - ailen[i-1];
457     if (fshift) {
458       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
459       N = ailen[i];
460       for ( j=0; j<N; j++ ) {
461         ip[j-fshift] = ip[j];
462         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
463       }
464     }
465     ai[i] = ai[i-1] + ailen[i-1];
466   }
467   if (mbs) {
468     fshift += imax[mbs-1] - ailen[mbs-1];
469     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
470   }
471   /* reset ilen and imax for each row */
472   for ( i=0; i<mbs; i++ ) {
473     ailen[i] = imax[i] = ai[i+1] - ai[i];
474   }
475   a->nz = (ai[m] + shift)*bs2;
476 
477   /* diagonals may have moved, so kill the diagonal pointers */
478   if (fshift && a->diag) {
479     PetscFree(a->diag);
480     PLogObjectMemory(A,-(m+1)*sizeof(int));
481     a->diag = 0;
482   }
483   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
484            fshift,a->nz,m);
485   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
486            a->reallocs);
487   return 0;
488 }
489 
490 static int MatZeroEntries_SeqBAIJ(Mat A)
491 {
492   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
493   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
494   return 0;
495 }
496 
497 int MatDestroy_SeqBAIJ(PetscObject obj)
498 {
499   Mat         A  = (Mat) obj;
500   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
501 
502 #if defined(PETSC_LOG)
503   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
504 #endif
505   PetscFree(a->a);
506   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
507   if (a->diag) PetscFree(a->diag);
508   if (a->ilen) PetscFree(a->ilen);
509   if (a->imax) PetscFree(a->imax);
510   if (a->solve_work) PetscFree(a->solve_work);
511   if (a->mult_work) PetscFree(a->mult_work);
512   PetscFree(a);
513   PLogObjectDestroy(A);
514   PetscHeaderDestroy(A);
515   return 0;
516 }
517 
518 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
519 {
520   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
521   if      (op == ROW_ORIENTED)              a->roworiented = 1;
522   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
523   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
524   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
525   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
526   else if (op == ROWS_SORTED ||
527            op == SYMMETRIC_MATRIX ||
528            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
529            op == YES_NEW_DIAGONALS)
530     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
531   else if (op == NO_NEW_DIAGONALS)
532     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
533   else
534     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
535   return 0;
536 }
537 
538 
539 /* -------------------------------------------------------*/
540 /* Should check that shapes of vectors and matrices match */
541 /* -------------------------------------------------------*/
542 #include "pinclude/plapack.h"
543 
544 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
545 {
546   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
547   Scalar          *xg,*yg;
548   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
549   register Scalar x1,x2,x3,x4,x5;
550   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
551   int             bs = a->bs,j,n,bs2 = bs*bs;
552 
553   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
554   PetscMemzero(y,m*sizeof(Scalar));
555   x     = x;
556   idx   = a->j;
557   v     = a->a;
558   ii    = a->i;
559 
560   switch (bs) {
561     case 1:
562      for ( i=0; i<m; i++ ) {
563        n    = ii[1] - ii[0]; ii++;
564        sum  = 0.0;
565        while (n--) sum += *v++ * x[*idx++];
566        y[i] = sum;
567       }
568       break;
569     case 2:
570       for ( i=0; i<mbs; i++ ) {
571         n  = ii[1] - ii[0]; ii++;
572         sum1 = 0.0; sum2 = 0.0;
573         for ( j=0; j<n; j++ ) {
574           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
575           sum1 += v[0]*x1 + v[2]*x2;
576           sum2 += v[1]*x1 + v[3]*x2;
577           v += 4;
578         }
579         y[0] += sum1; y[1] += sum2;
580         y += 2;
581       }
582       break;
583     case 3:
584       for ( i=0; i<mbs; i++ ) {
585         n  = ii[1] - ii[0]; ii++;
586         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
587         for ( j=0; j<n; j++ ) {
588           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
589           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
590           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
591           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
592           v += 9;
593         }
594         y[0] += sum1; y[1] += sum2; y[2] += sum3;
595         y += 3;
596       }
597       break;
598     case 4:
599       for ( i=0; i<mbs; i++ ) {
600         n  = ii[1] - ii[0]; ii++;
601         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
602         for ( j=0; j<n; j++ ) {
603           xb = x + 4*(*idx++);
604           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
605           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
606           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
607           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
608           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
609           v += 16;
610         }
611         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
612         y += 4;
613       }
614       break;
615     case 5:
616       for ( i=0; i<mbs; i++ ) {
617         n  = ii[1] - ii[0]; ii++;
618         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
619         for ( j=0; j<n; j++ ) {
620           xb = x + 5*(*idx++);
621           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
622           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
623           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
624           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
625           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
626           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
627           v += 25;
628         }
629         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
630         y += 5;
631       }
632       break;
633       /* block sizes larger then 5 by 5 are handled by BLAS */
634     default: {
635       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
636       if (!a->mult_work) {
637         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
638         CHKPTRQ(a->mult_work);
639       }
640       work = a->mult_work;
641       for ( i=0; i<mbs; i++ ) {
642         n     = ii[1] - ii[0]; ii++;
643         ncols = n*bs;
644         workt = work;
645         for ( j=0; j<n; j++ ) {
646           xb = x + bs*(*idx++);
647           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
648           workt += bs;
649         }
650         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
651         v += n*bs2;
652         y += bs;
653       }
654     }
655   }
656   PLogFlops(2*bs2*a->nz - m);
657   return 0;
658 }
659 
660 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
661 {
662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
663   if (nz)  *nz  = a->bs*a->bs*a->nz;
664   if (nza) *nza = a->maxnz;
665   if (mem) *mem = (int)A->mem;
666   return 0;
667 }
668 
669 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
670 {
671   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
672 
673   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
674 
675   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
676   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
677       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
678     *flg = PETSC_FALSE; return 0;
679   }
680 
681   /* if the a->i are the same */
682   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
683     *flg = PETSC_FALSE; return 0;
684   }
685 
686   /* if a->j are the same */
687   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
688     *flg = PETSC_FALSE; return 0;
689   }
690 
691   /* if a->a are the same */
692   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
693     *flg = PETSC_FALSE; return 0;
694   }
695   *flg = PETSC_TRUE;
696   return 0;
697 
698 }
699 
700 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
701 {
702   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
703   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
704   Scalar      *x, zero = 0.0,*aa,*aa_j;
705 
706   bs   = a->bs;
707   aa   = a->a;
708   ai   = a->i;
709   aj   = a->j;
710   ambs = a->mbs;
711   bs2  = bs*bs;
712 
713   VecSet(&zero,v);
714   VecGetArray(v,&x); VecGetLocalSize(v,&n);
715   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
716   for ( i=0; i<ambs; i++ ) {
717     for ( j=ai[i]; j<ai[i+1]; j++ ) {
718       if (aj[j] == i) {
719         row  = i*bs;
720         aa_j = aa+j*bs2;
721         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
722         break;
723       }
724     }
725   }
726   return 0;
727 }
728 
729 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
730 {
731   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
732   Scalar      *l,*r,x,*v,*aa,*li,*ri;
733   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
734 
735   ai  = a->i;
736   aj  = a->j;
737   aa  = a->a;
738   m   = a->m;
739   n   = a->n;
740   bs  = a->bs;
741   mbs = a->mbs;
742   bs2 = bs*bs;
743   if (ll) {
744     VecGetArray(ll,&l); VecGetSize(ll,&lm);
745     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
746     for ( i=0; i<mbs; i++ ) { /* for each block row */
747       M  = ai[i+1] - ai[i];
748       li = l + i*bs;
749       v  = aa + bs2*ai[i];
750       for ( j=0; j<M; j++ ) { /* for each block */
751         for ( k=0; k<bs2; k++ ) {
752           (*v++) *= li[k%bs];
753         }
754       }
755     }
756   }
757 
758   if (rr) {
759     VecGetArray(rr,&r); VecGetSize(rr,&rn);
760     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
761     for ( i=0; i<mbs; i++ ) { /* for each block row */
762       M  = ai[i+1] - ai[i];
763       v  = aa + bs2*ai[i];
764       for ( j=0; j<M; j++ ) { /* for each block */
765         ri = r + bs*aj[ai[i]+j];
766         for ( k=0; k<bs; k++ ) {
767           x = ri[k];
768           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
769         }
770       }
771     }
772   }
773   return 0;
774 }
775 
776 
777 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
778 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
779 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
780 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
781 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
782 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
783 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
784 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
785 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
786 
787 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
788 {
789   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
790   Scalar      *v = a->a;
791   double      sum = 0.0;
792   int         i;
793 
794   if (type == NORM_FROBENIUS) {
795     for (i=0; i<a->nz; i++ ) {
796 #if defined(PETSC_COMPLEX)
797       sum += real(conj(*v)*(*v)); v++;
798 #else
799       sum += (*v)*(*v); v++;
800 #endif
801     }
802     *norm = sqrt(sum);
803   }
804   else {
805     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
806   }
807   return 0;
808 }
809 
810 /*
811      note: This can only work for identity for row and col. It would
812    be good to check this and otherwise generate an error.
813 */
814 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
815 {
816   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
817   Mat         outA;
818   int         ierr;
819 
820   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
821 
822   outA          = inA;
823   inA->factor   = FACTOR_LU;
824   a->row        = row;
825   a->col        = col;
826 
827   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
828 
829   if (!a->diag) {
830     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
831   }
832   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
833   return 0;
834 }
835 
836 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
837 {
838   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
839   int         one = 1, totalnz = a->bs*a->bs*a->nz;
840   BLscal_( &totalnz, alpha, a->a, &one );
841   PLogFlops(totalnz);
842   return 0;
843 }
844 
845 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
846 {
847   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
848   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
849   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
850   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
851   Scalar     *ap, *aa = a->a, zero = 0.0;
852 
853   for ( k=0; k<m; k++ ) { /* loop over rows */
854     row  = im[k]; brow = row/bs;
855     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
856     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
857     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
858     nrow = ailen[brow];
859     for ( l=0; l<n; l++ ) { /* loop over columns */
860       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
861       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
862       col  = in[l] - shift;
863       bcol = col/bs;
864       cidx = col%bs;
865       ridx = row%bs;
866       high = nrow;
867       low  = 0; /* assume unsorted */
868       while (high-low > 5) {
869         t = (low+high)/2;
870         if (rp[t] > bcol) high = t;
871         else             low  = t;
872       }
873       for ( i=low; i<high; i++ ) {
874         if (rp[i] > bcol) break;
875         if (rp[i] == bcol) {
876           *v++ = ap[bs2*i+bs*cidx+ridx];
877           goto finished;
878         }
879       }
880       *v++ = zero;
881       finished:;
882     }
883   }
884   return 0;
885 }
886 
887 int MatPrintHelp_SeqBAIJ(Mat A)
888 {
889   static int called = 0;
890 
891   if (called) return 0; else called = 1;
892   return 0;
893 }
894 /* -------------------------------------------------------------------*/
895 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
896        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
897        MatMult_SeqBAIJ,0,
898        0,0,
899        MatSolve_SeqBAIJ,0,
900        0,0,
901        MatLUFactor_SeqBAIJ,0,
902        0,
903        MatTranspose_SeqBAIJ,
904        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
905        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
906        0,MatAssemblyEnd_SeqBAIJ,
907        0,
908        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
909        MatGetReordering_SeqBAIJ,
910        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
911        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
912        MatILUFactorSymbolic_SeqBAIJ,0,
913        0,0,/*  MatConvert_SeqBAIJ  */ 0,
914        0,0,
915        MatConvertSameType_SeqBAIJ,0,0,
916        MatILUFactor_SeqBAIJ,0,0,
917        0,0,
918        MatGetValues_SeqBAIJ,0,
919        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
920        0};
921 
922 /*@C
923    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
924    (the default parallel PETSc format).  For good matrix assembly performance
925    the user should preallocate the matrix storage by setting the parameter nz
926    (or nzz).  By setting these parameters accurately, performance can be
927    increased by more than a factor of 50.
928 
929    Input Parameters:
930 .  comm - MPI communicator, set to MPI_COMM_SELF
931 .  bs - size of block
932 .  m - number of rows
933 .  n - number of columns
934 .  nz - number of block nonzeros per block row (same for all rows)
935 .  nzz - number of block nonzeros per block row or PETSC_NULL
936          (possibly different for each row)
937 
938    Output Parameter:
939 .  A - the matrix
940 
941    Notes:
942    The BAIJ format, is fully compatible with standard Fortran 77
943    storage.  That is, the stored row and column indices can begin at
944    either one (as in Fortran) or zero.  See the users manual for details.
945 
946    Specify the preallocated storage with either nz or nnz (not both).
947    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
948    allocation.  For additional details, see the users manual chapter on
949    matrices and the file $(PETSC_DIR)/Performance.
950 
951 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
952 @*/
953 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
954 {
955   Mat         B;
956   Mat_SeqBAIJ *b;
957   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
958 
959   if (mbs*bs!=m || nbs*bs!=n)
960     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
961 
962   *A = 0;
963   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
964   PLogObjectCreate(B);
965   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
966   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
967   switch (bs) {
968     case 1:
969       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
970       break;
971     case 2:
972       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
973       break;
974     case 3:
975       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
976       break;
977     case 4:
978       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
979       break;
980     case 5:
981       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
982       break;
983   }
984   B->destroy          = MatDestroy_SeqBAIJ;
985   B->view             = MatView_SeqBAIJ;
986   B->factor           = 0;
987   B->lupivotthreshold = 1.0;
988   b->row              = 0;
989   b->col              = 0;
990   b->reallocs         = 0;
991 
992   b->m       = m;
993   b->mbs     = mbs;
994   b->n       = n;
995   b->nbs     = nbs;
996   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
997   if (nnz == PETSC_NULL) {
998     if (nz == PETSC_DEFAULT) nz = CHUNKSIZE;
999     else if (nz <= 0)        nz = 1;
1000     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1001     nz = nz*mbs;
1002   }
1003   else {
1004     nz = 0;
1005     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1006   }
1007 
1008   /* allocate the matrix space */
1009   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1010   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1011   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1012   b->j  = (int *) (b->a + nz*bs2);
1013   PetscMemzero(b->j,nz*sizeof(int));
1014   b->i  = b->j + nz;
1015   b->singlemalloc = 1;
1016 
1017   b->i[0] = 0;
1018   for (i=1; i<mbs+1; i++) {
1019     b->i[i] = b->i[i-1] + b->imax[i-1];
1020   }
1021 
1022   /* b->ilen will count nonzeros in each block row so far. */
1023   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1024   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1025   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1026 
1027   b->bs               = bs;
1028   b->mbs              = mbs;
1029   b->nz               = 0;
1030   b->maxnz            = nz;
1031   b->sorted           = 0;
1032   b->roworiented      = 1;
1033   b->nonew            = 0;
1034   b->diag             = 0;
1035   b->solve_work       = 0;
1036   b->mult_work        = 0;
1037   b->spptr            = 0;
1038 
1039   *A = B;
1040   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1041   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1042   return 0;
1043 }
1044 
1045 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1046 {
1047   Mat         C;
1048   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1049   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1050 
1051   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1052 
1053   *B = 0;
1054   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1055   PLogObjectCreate(C);
1056   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1057   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1058   C->destroy    = MatDestroy_SeqBAIJ;
1059   C->view       = MatView_SeqBAIJ;
1060   C->factor     = A->factor;
1061   c->row        = 0;
1062   c->col        = 0;
1063   C->assembled  = PETSC_TRUE;
1064 
1065   c->m          = a->m;
1066   c->n          = a->n;
1067   c->bs         = a->bs;
1068   c->mbs        = a->mbs;
1069   c->nbs        = a->nbs;
1070 
1071   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1072   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1073   for ( i=0; i<mbs; i++ ) {
1074     c->imax[i] = a->imax[i];
1075     c->ilen[i] = a->ilen[i];
1076   }
1077 
1078   /* allocate the matrix space */
1079   c->singlemalloc = 1;
1080   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1081   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1082   c->j  = (int *) (c->a + nz*bs2);
1083   c->i  = c->j + nz;
1084   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1085   if (mbs > 0) {
1086     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1087     if (cpvalues == COPY_VALUES) {
1088       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1089     }
1090   }
1091 
1092   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1093   c->sorted      = a->sorted;
1094   c->roworiented = a->roworiented;
1095   c->nonew       = a->nonew;
1096 
1097   if (a->diag) {
1098     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1099     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1100     for ( i=0; i<mbs; i++ ) {
1101       c->diag[i] = a->diag[i];
1102     }
1103   }
1104   else c->diag          = 0;
1105   c->nz                 = a->nz;
1106   c->maxnz              = a->maxnz;
1107   c->solve_work         = 0;
1108   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1109   c->mult_work          = 0;
1110   *B = C;
1111   return 0;
1112 }
1113 
1114 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1115 {
1116   Mat_SeqBAIJ  *a;
1117   Mat          B;
1118   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1119   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1120   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1121   int          *masked, nmask,tmp,ishift,bs2;
1122   Scalar       *aa;
1123   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1124 
1125   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1126   bs2  = bs*bs;
1127 
1128   MPI_Comm_size(comm,&size);
1129   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1130   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1131   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1132   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1133   M = header[1]; N = header[2]; nz = header[3];
1134 
1135   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1136 
1137   /*
1138      This code adds extra rows to make sure the number of rows is
1139     divisible by the blocksize
1140   */
1141   mbs        = M/bs;
1142   extra_rows = bs - M + bs*(mbs);
1143   if (extra_rows == bs) extra_rows = 0;
1144   else                  mbs++;
1145   if (extra_rows) {
1146     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1147   }
1148 
1149   /* read in row lengths */
1150   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1151   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1152   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1153 
1154   /* read in column indices */
1155   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1156   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1157   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1158 
1159   /* loop over row lengths determining block row lengths */
1160   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1161   PetscMemzero(browlengths,mbs*sizeof(int));
1162   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1163   PetscMemzero(mask,mbs*sizeof(int));
1164   masked = mask + mbs;
1165   rowcount = 0; nzcount = 0;
1166   for ( i=0; i<mbs; i++ ) {
1167     nmask = 0;
1168     for ( j=0; j<bs; j++ ) {
1169       kmax = rowlengths[rowcount];
1170       for ( k=0; k<kmax; k++ ) {
1171         tmp = jj[nzcount++]/bs;
1172         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1173       }
1174       rowcount++;
1175     }
1176     browlengths[i] += nmask;
1177     /* zero out the mask elements we set */
1178     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1179   }
1180 
1181   /* create our matrix */
1182   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1183          CHKERRQ(ierr);
1184   B = *A;
1185   a = (Mat_SeqBAIJ *) B->data;
1186 
1187   /* set matrix "i" values */
1188   a->i[0] = 0;
1189   for ( i=1; i<= mbs; i++ ) {
1190     a->i[i]      = a->i[i-1] + browlengths[i-1];
1191     a->ilen[i-1] = browlengths[i-1];
1192   }
1193   a->indexshift = 0;
1194   a->nz         = 0;
1195   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1196 
1197   /* read in nonzero values */
1198   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1199   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1200   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1201 
1202   /* set "a" and "j" values into matrix */
1203   nzcount = 0; jcount = 0;
1204   for ( i=0; i<mbs; i++ ) {
1205     nzcountb = nzcount;
1206     nmask    = 0;
1207     for ( j=0; j<bs; j++ ) {
1208       kmax = rowlengths[i*bs+j];
1209       for ( k=0; k<kmax; k++ ) {
1210         tmp = jj[nzcount++]/bs;
1211 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1212       }
1213       rowcount++;
1214     }
1215     /* sort the masked values */
1216     PetscSortInt(nmask,masked);
1217 
1218     /* set "j" values into matrix */
1219     maskcount = 1;
1220     for ( j=0; j<nmask; j++ ) {
1221       a->j[jcount++]  = masked[j];
1222       mask[masked[j]] = maskcount++;
1223     }
1224     /* set "a" values into matrix */
1225     ishift = bs2*a->i[i];
1226     for ( j=0; j<bs; j++ ) {
1227       kmax = rowlengths[i*bs+j];
1228       for ( k=0; k<kmax; k++ ) {
1229         tmp    = jj[nzcountb]/bs ;
1230         block  = mask[tmp] - 1;
1231         point  = jj[nzcountb] - bs*tmp;
1232         idx    = ishift + bs2*block + j + bs*point;
1233         a->a[idx] = aa[nzcountb++];
1234       }
1235     }
1236     /* zero out the mask elements we set */
1237     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1238   }
1239   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1240 
1241   PetscFree(rowlengths);
1242   PetscFree(browlengths);
1243   PetscFree(aa);
1244   PetscFree(jj);
1245   PetscFree(mask);
1246 
1247   B->assembled = PETSC_TRUE;
1248 
1249   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1250   if (flg) {
1251     Viewer tviewer;
1252     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1253     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1254     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1255     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1256   }
1257   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1258   if (flg) {
1259     Viewer tviewer;
1260     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1261     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1262     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1263     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1264   }
1265   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1266   if (flg) {
1267     Viewer tviewer;
1268     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1269     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1270     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1271   }
1272   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1273   if (flg) {
1274     Viewer tviewer;
1275     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1276     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1277     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1278     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1279   }
1280   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1281   if (flg) {
1282     Viewer tviewer;
1283     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1284     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1285     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1286     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1287   }
1288   return 0;
1289 }
1290 
1291 
1292 
1293