xref: /petsc/src/mat/impls/baij/seq/baij.c (revision af6b99e9b189e8e8dab39012dffe0bbbf03b23b9)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.25 1996/04/04 00:27:51 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 #define CHUNKSIZE  2
225 
226 /* This version has row oriented v  */
227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
228 {
229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
231   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
232   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
233   int          ridx,cidx;
234   Scalar      *ap,value,*aa=a->a,*bap;
235 
236   for ( k=0; k<m; k++ ) { /* loop over added rows */
237     row  = im[k]; brow = row/bs;
238     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
239     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
240     rp   = aj + ai[brow] + shift; ap = aa + bs*bs*ai[brow] + shift;
241     rmax = imax[brow]; nrow = ailen[brow];
242     low = 0;
243     for ( l=0; l<n; l++ ) { /* loop over added columns */
244       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
245       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
246       col = in[l] - shift; bcol = col/bs;
247       ridx = row % bs; cidx = col % bs;
248       if (roworiented) {
249         value = *v++;
250       }
251       else {
252         value = v[k + l*m];
253       }
254       if (!sorted) low = 0; high = nrow;
255       while (high-low > 5) {
256         t = (low+high)/2;
257         if (rp[t] > bcol) high = t;
258         else             low  = t;
259       }
260       for ( i=low; i<high; i++ ) {
261         if (rp[i] > bcol) break;
262         if (rp[i] == bcol) {
263           bap  = ap +  bs*bs*i + bs*ridx + cidx;
264           if (is == ADD_VALUES) *bap += value;
265           else                  *bap  = value;
266           goto noinsert;
267         }
268       }
269       if (nonew) goto noinsert;
270       if (nrow >= rmax) {
271         /* there is no extra room in row, therefore enlarge */
272         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
273         Scalar *new_a;
274 
275         /* malloc new storage space */
276         len     = new_nz*(sizeof(int)+bs*bs*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
277         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
278         new_j   = (int *) (new_a + bs*bs*new_nz);
279         new_i   = new_j + new_nz;
280 
281         /* copy over old data into new slots */
282         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
283         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
284         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));
285         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
286         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
287                                                            len*sizeof(int));
288         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs*bs*sizeof(Scalar));
289         PetscMemzero(new_a+bs*bs*(ai[brow]+shift+nrow),bs*bs*CHUNKSIZE);
290         PetscMemcpy(new_a+bs*bs*(ai[brow]+shift+nrow+CHUNKSIZE),
291                     aa+bs*bs*(ai[brow]+shift+nrow),bs*bs*len*sizeof(Scalar));
292         /* free up old matrix storage */
293         PetscFree(a->a);
294         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
295         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
296         a->singlemalloc = 1;
297 
298         rp   = aj + ai[brow] + shift; ap = aa + ai[brow] + shift;
299         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
300         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs*bs*sizeof(Scalar)));
301         a->maxnz += bs*bs*CHUNKSIZE;
302         a->reallocs++;
303       }
304       N = nrow++ - 1; a->nz++;
305       /* shift up all the later entries in this row */
306       for ( ii=N; ii>=i; ii-- ) {
307         rp[ii+1] = rp[ii];
308         PetscMemcpy(ap+bs*bs*(ii+1),ap+bs*bs*(ii),bs*bs);
309       }
310       rp[i] = bcol;
311       ap[bs*bs*i + bs*ridx + cidx] = value;
312       noinsert:;
313       low = i;
314     }
315     ailen[brow] = nrow;
316   }
317   return 0;
318 }
319 
320 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
321 {
322   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
323   *m = a->m; *n = a->n;
324   return 0;
325 }
326 
327 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
328 {
329   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
330   *m = 0; *n = a->m;
331   return 0;
332 }
333 
334 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
335 {
336   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
337   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i;
338   Scalar      *aa,*v_i,*aa_i;
339 
340   bs = a->bs;
341   ai = a->i;
342   aj = a->j;
343   aa = a->a;
344 
345   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
346 
347   bn  = row/bs;   /* Block number */
348   bp  = row % bs; /* Block Position */
349   M   = ai[bn+1] - ai[bn];
350   *nz = bs*M;
351 
352   if (v) {
353     *v = 0;
354     if (*nz) {
355       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
356       for ( i=0; i<M; i++ ) { /* for each block in the block row */
357         v_i  = *v + i*bs;
358         aa_i = aa + bs*bs*(ai[bn] + i);
359         for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];}
360       }
361     }
362   }
363 
364   if (idx) {
365     *idx = 0;
366     if (*nz) {
367       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
368       for ( i=0; i<M; i++ ) { /* for each block in the block row */
369         idx_i = *idx + i*bs;
370         itmp  = bs*aj[ai[bn] + i];
371         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
372       }
373     }
374   }
375   return 0;
376 }
377 
378 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
379 {
380   if (idx) {if (*idx) PetscFree(*idx);}
381   if (v)   {if (*v)   PetscFree(*v);}
382   return 0;
383 }
384 
385 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
386 {
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
388   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
389   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
390   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
391   Scalar     *aa = a->a, *ap;
392 
393   if (mode == FLUSH_ASSEMBLY) return 0;
394 
395   for ( i=1; i<mbs; i++ ) {
396     /* move each row back by the amount of empty slots (fshift) before it*/
397     fshift += imax[i-1] - ailen[i-1];
398     if (fshift) {
399       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
400       N = ailen[i];
401       for ( j=0; j<N; j++ ) {
402         ip[j-fshift] = ip[j];
403         PetscMemcpy(ap+(j-fshift)*bs2,ap+j+bs2,bs2);
404       }
405     }
406     ai[i] = ai[i-1] + ailen[i-1];
407   }
408   if (mbs) {
409     fshift += imax[mbs-1] - ailen[mbs-1];
410     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
411   }
412   /* reset ilen and imax for each row */
413   for ( i=0; i<mbs; i++ ) {
414     ailen[i] = imax[i] = ai[i+1] - ai[i];
415   }
416   a->nz = (ai[m] + shift)*bs2;
417 
418   /* diagonals may have moved, so kill the diagonal pointers */
419   if (fshift && a->diag) {
420     PetscFree(a->diag);
421     PLogObjectMemory(A,-(m+1)*sizeof(int));
422     a->diag = 0;
423   }
424   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
425            fshift,a->nz,m);
426   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
427            a->reallocs);
428   return 0;
429 }
430 
431 static int MatZeroEntries_SeqBAIJ(Mat A)
432 {
433   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
434   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
435   return 0;
436 }
437 
438 int MatDestroy_SeqBAIJ(PetscObject obj)
439 {
440   Mat         A  = (Mat) obj;
441   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
442 
443 #if defined(PETSC_LOG)
444   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
445 #endif
446   PetscFree(a->a);
447   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
448   if (a->diag) PetscFree(a->diag);
449   if (a->ilen) PetscFree(a->ilen);
450   if (a->imax) PetscFree(a->imax);
451   if (a->solve_work) PetscFree(a->solve_work);
452   if (a->mult_work) PetscFree(a->mult_work);
453   PetscFree(a);
454   PLogObjectDestroy(A);
455   PetscHeaderDestroy(A);
456   return 0;
457 }
458 
459 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
460 {
461   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
462   if      (op == ROW_ORIENTED)              a->roworiented = 1;
463   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
464   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
465   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
466   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
467   else if (op == ROWS_SORTED ||
468            op == SYMMETRIC_MATRIX ||
469            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
470            op == YES_NEW_DIAGONALS)
471     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
472   else if (op == NO_NEW_DIAGONALS)
473     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
474   else
475     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
476   return 0;
477 }
478 
479 
480 /* -------------------------------------------------------*/
481 /* Should check that shapes of vectors and matrices match */
482 /* -------------------------------------------------------*/
483 #include "pinclude/plapack.h"
484 
485 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
486 {
487   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
488   Scalar          *xg,*yg;
489   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
490   register Scalar x1,x2,x3,x4,x5;
491   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
492   int             bs = a->bs,j,n,bs2 = bs*bs;
493 
494   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
495   PetscMemzero(y,m*sizeof(Scalar));
496   x     = x;
497   idx   = a->j;
498   v     = a->a;
499   ii    = a->i;
500 
501   switch (bs) {
502     case 1:
503      for ( i=0; i<m; i++ ) {
504        n    = ii[1] - ii[0]; ii++;
505        sum  = 0.0;
506        while (n--) sum += *v++ * x[*idx++];
507        y[i] = sum;
508       }
509       break;
510     case 2:
511       for ( i=0; i<mbs; i++ ) {
512         n  = ii[1] - ii[0]; ii++;
513         sum1 = 0.0; sum2 = 0.0;
514         for ( j=0; j<n; j++ ) {
515           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
516           sum1 += v[0]*x1 + v[2]*x2;
517           sum2 += v[1]*x1 + v[3]*x2;
518           v += 4;
519         }
520         y[0] += sum1; y[1] += sum2;
521         y += 2;
522       }
523       break;
524     case 3:
525       for ( i=0; i<mbs; i++ ) {
526         n  = ii[1] - ii[0]; ii++;
527         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
528         for ( j=0; j<n; j++ ) {
529           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
530           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
531           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
532           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
533           v += 9;
534         }
535         y[0] += sum1; y[1] += sum2; y[2] += sum3;
536         y += 3;
537       }
538       break;
539     case 4:
540       for ( i=0; i<mbs; i++ ) {
541         n  = ii[1] - ii[0]; ii++;
542         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
543         for ( j=0; j<n; j++ ) {
544           xb = x + 4*(*idx++);
545           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
546           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
547           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
548           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
549           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
550           v += 16;
551         }
552         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
553         y += 4;
554       }
555       break;
556     case 5:
557       for ( i=0; i<mbs; i++ ) {
558         n  = ii[1] - ii[0]; ii++;
559         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
560         for ( j=0; j<n; j++ ) {
561           xb = x + 5*(*idx++);
562           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
563           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
564           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
565           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
566           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
567           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
568           v += 25;
569         }
570         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
571         y += 5;
572       }
573       break;
574       /* block sizes larger then 5 by 5 are handled by BLAS */
575     default: {
576       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
577       if (!a->mult_work) {
578         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
579         CHKPTRQ(a->mult_work);
580       }
581       work = a->mult_work;
582       for ( i=0; i<mbs; i++ ) {
583         n     = ii[1] - ii[0]; ii++;
584         ncols = n*bs;
585         workt = work;
586         for ( j=0; j<n; j++ ) {
587           xb = x + bs*(*idx++);
588           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
589           workt += bs;
590         }
591         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
592         v += n*bs2;
593         y += bs;
594       }
595     }
596   }
597   PLogFlops(2*bs2*a->nz - m);
598   return 0;
599 }
600 
601 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
602 {
603   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
604   if (nz)  *nz  = a->bs*a->bs*a->nz;
605   if (nza) *nza = a->maxnz;
606   if (mem) *mem = (int)A->mem;
607   return 0;
608 }
609 
610 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
611 {
612   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
613 
614   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
615 
616   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
617   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
618       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
619     *flg = PETSC_FALSE; return 0;
620   }
621 
622   /* if the a->i are the same */
623   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
624     *flg = PETSC_FALSE; return 0;
625   }
626 
627   /* if a->j are the same */
628   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
629     *flg = PETSC_FALSE; return 0;
630   }
631 
632   /* if a->a are the same */
633   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
634     *flg = PETSC_FALSE; return 0;
635   }
636   *flg = PETSC_TRUE;
637   return 0;
638 
639 }
640 
641 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
642 {
643   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
644   int         i,j,k,n,row,bs,*ai,*aj,ambs;
645   Scalar      *x, zero = 0.0,*aa,*aa_j;
646 
647   bs  = a->bs;
648   aa   = a->a;
649   ai   = a->i;
650   aj   = a->j;
651   ambs = a->mbs;
652 
653   VecSet(&zero,v);
654   VecGetArray(v,&x); VecGetLocalSize(v,&n);
655   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
656   for ( i=0; i<ambs; i++ ) {
657     for ( j=ai[i]; j<ai[i+1]; j++ ) {
658       if (aj[j] == i) {
659         row  = i*bs;
660         aa_j = aa+j*bs*bs;
661         for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k];
662         break;
663       }
664     }
665   }
666   return 0;
667 }
668 
669 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
670 {
671   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
672   Scalar      *l,*r,x,*v,*aa,*li,*ri;
673   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs;
674 
675   ai  = a->i;
676   aj  = a->j;
677   aa  = a->a;
678   m   = a->m;
679   n   = a->n;
680   bs  = a->bs;
681   mbs = a->mbs;
682 
683   if (ll) {
684     VecGetArray(ll,&l); VecGetSize(ll,&lm);
685     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
686     for ( i=0; i<mbs; i++ ) { /* for each block row */
687       M  = ai[i+1] - ai[i];
688       li = l + i*bs;
689       v  = aa + bs*bs*ai[i];
690       for ( j=0; j<M; j++ ) { /* for each block */
691         for ( k=0; k<bs*bs; k++ ) {
692           (*v++) *= li[k%bs];
693         }
694       }
695     }
696   }
697 
698   if (rr) {
699     VecGetArray(rr,&r); VecGetSize(rr,&rn);
700     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
701     for ( i=0; i<mbs; i++ ) { /* for each block row */
702       M  = ai[i+1] - ai[i];
703       v  = aa + bs*bs*ai[i];
704       for ( j=0; j<M; j++ ) { /* for each block */
705         ri = r + bs*aj[ai[i]+j];
706         for ( k=0; k<bs; k++ ) {
707           x = ri[k];
708           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
709         }
710       }
711     }
712   }
713   return 0;
714 }
715 
716 
717 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
718 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
719 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
720 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
721 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
722 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
723 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
724 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
725 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
726 
727 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
728 {
729   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
730   Scalar      *v = a->a;
731   double      sum = 0.0;
732   int         i;
733 
734   if (type == NORM_FROBENIUS) {
735     for (i=0; i<a->nz; i++ ) {
736 #if defined(PETSC_COMPLEX)
737       sum += real(conj(*v)*(*v)); v++;
738 #else
739       sum += (*v)*(*v); v++;
740 #endif
741     }
742     *norm = sqrt(sum);
743   }
744   else {
745     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
746   }
747   return 0;
748 }
749 
750 /*
751      note: This can only work for identity for row and col. It would
752    be good to check this and otherwise generate an error.
753 */
754 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
755 {
756   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
757   Mat         outA;
758   int         ierr;
759 
760   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
761 
762   outA          = inA;
763   inA->factor   = FACTOR_LU;
764   a->row        = row;
765   a->col        = col;
766 
767   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
768 
769   if (!a->diag) {
770     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
771   }
772   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
773   return 0;
774 }
775 
776 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
777 {
778   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
779   int         one = 1, totalnz = a->bs*a->bs*a->nz;
780   BLscal_( &totalnz, alpha, a->a, &one );
781   PLogFlops(totalnz);
782   return 0;
783 }
784 
785 int MatPrintHelp_SeqBAIJ(Mat A)
786 {
787   static int called = 0;
788 
789   if (called) return 0; else called = 1;
790   return 0;
791 }
792 /* -------------------------------------------------------------------*/
793 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
794        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
795        MatMult_SeqBAIJ,0,
796        0,0,
797        MatSolve_SeqBAIJ,0,
798        0,0,
799        MatLUFactor_SeqBAIJ,0,
800        0,
801        0,
802        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
803        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
804        0,MatAssemblyEnd_SeqBAIJ,
805        0,
806        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
807        MatGetReordering_SeqBAIJ,
808        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
809        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
810        MatILUFactorSymbolic_SeqBAIJ,0,
811        0,0,/*  MatConvert_SeqBAIJ  */ 0,
812        0,0,
813        MatConvertSameType_SeqBAIJ,0,0,
814        MatILUFactor_SeqBAIJ,0,0,
815        0,0,
816        0,0,
817        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
818        0};
819 
820 /*@C
821    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
822    (the default parallel PETSc format).  For good matrix assembly performance
823    the user should preallocate the matrix storage by setting the parameter nz
824    (or nzz).  By setting these parameters accurately, performance can be
825    increased by more than a factor of 50.
826 
827    Input Parameters:
828 .  comm - MPI communicator, set to MPI_COMM_SELF
829 .  bs - size of block
830 .  m - number of rows
831 .  n - number of columns
832 .  nz - number of block nonzeros per block row (same for all rows)
833 .  nzz - number of block nonzeros per block row or PETSC_NULL
834          (possibly different for each row)
835 
836    Output Parameter:
837 .  A - the matrix
838 
839    Notes:
840    The BAIJ format, is fully compatible with standard Fortran 77
841    storage.  That is, the stored row and column indices can begin at
842    either one (as in Fortran) or zero.  See the users manual for details.
843 
844    Specify the preallocated storage with either nz or nnz (not both).
845    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
846    allocation.  For additional details, see the users manual chapter on
847    matrices and the file $(PETSC_DIR)/Performance.
848 
849 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
850 @*/
851 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
852 {
853   Mat         B;
854   Mat_SeqBAIJ *b;
855   int         i,len,ierr, flg,mbs = m/bs;
856 
857   if (mbs*bs != m)
858     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
859 
860   *A      = 0;
861   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
862   PLogObjectCreate(B);
863   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
864   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
865   switch (bs) {
866     case 1:
867       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
868       break;
869     case 2:
870       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
871       break;
872     case 3:
873       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
874       break;
875     case 4:
876       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
877       break;
878     case 5:
879       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
880       break;
881   }
882   B->destroy          = MatDestroy_SeqBAIJ;
883   B->view             = MatView_SeqBAIJ;
884   B->factor           = 0;
885   B->lupivotthreshold = 1.0;
886   b->row              = 0;
887   b->col              = 0;
888   b->reallocs         = 0;
889 
890   b->m       = m;
891   b->mbs     = mbs;
892   b->n       = n;
893   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
894   if (nnz == PETSC_NULL) {
895     if (nz == PETSC_DEFAULT) nz = 5;
896     else if (nz <= 0)        nz = 1;
897     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
898     nz = nz*mbs;
899   }
900   else {
901     nz = 0;
902     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
903   }
904 
905   /* allocate the matrix space */
906   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
907   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
908   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
909   b->j  = (int *) (b->a + nz*bs*bs);
910   PetscMemzero(b->j,nz*sizeof(int));
911   b->i  = b->j + nz;
912   b->singlemalloc = 1;
913 
914   b->i[0] = 0;
915   for (i=1; i<mbs+1; i++) {
916     b->i[i] = b->i[i-1] + b->imax[i-1];
917   }
918 
919   /* b->ilen will count nonzeros in each block row so far. */
920   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
921   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
922   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
923 
924   b->bs               = bs;
925   b->mbs              = mbs;
926   b->nz               = 0;
927   b->maxnz            = nz;
928   b->sorted           = 0;
929   b->roworiented      = 1;
930   b->nonew            = 0;
931   b->diag             = 0;
932   b->solve_work       = 0;
933   b->mult_work        = 0;
934   b->spptr            = 0;
935 
936   *A = B;
937   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
938   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
939   return 0;
940 }
941 
942 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
943 {
944   Mat         C;
945   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
946   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
947 
948   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
949 
950   *B = 0;
951   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
952   PLogObjectCreate(C);
953   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
954   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
955   C->destroy    = MatDestroy_SeqBAIJ;
956   C->view       = MatView_SeqBAIJ;
957   C->factor     = A->factor;
958   c->row        = 0;
959   c->col        = 0;
960   C->assembled  = PETSC_TRUE;
961 
962   c->m          = a->m;
963   c->n          = a->n;
964   c->bs         = a->bs;
965   c->mbs        = a->mbs;
966   c->nbs        = a->nbs;
967 
968   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
969   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
970   for ( i=0; i<mbs; i++ ) {
971     c->imax[i] = a->imax[i];
972     c->ilen[i] = a->ilen[i];
973   }
974 
975   /* allocate the matrix space */
976   c->singlemalloc = 1;
977   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
978   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
979   c->j  = (int *) (c->a + nz*bs*bs);
980   c->i  = c->j + nz;
981   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
982   if (mbs > 0) {
983     PetscMemcpy(c->j,a->j,nz*sizeof(int));
984     if (cpvalues == COPY_VALUES) {
985       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
986     }
987   }
988 
989   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
990   c->sorted      = a->sorted;
991   c->roworiented = a->roworiented;
992   c->nonew       = a->nonew;
993 
994   if (a->diag) {
995     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
996     PLogObjectMemory(C,(mbs+1)*sizeof(int));
997     for ( i=0; i<mbs; i++ ) {
998       c->diag[i] = a->diag[i];
999     }
1000   }
1001   else c->diag          = 0;
1002   c->nz                 = a->nz;
1003   c->maxnz              = a->maxnz;
1004   c->solve_work         = 0;
1005   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1006   c->mult_work          = 0;
1007   *B = C;
1008   return 0;
1009 }
1010 
1011 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1012 {
1013   Mat_SeqBAIJ  *a;
1014   Mat          B;
1015   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1016   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1017   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1018   int          *masked, nmask,tmp,ishift,bs2;
1019   Scalar       *aa;
1020   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1021 
1022   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1023   bs2  = bs*bs;
1024 
1025   MPI_Comm_size(comm,&size);
1026   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1027   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1028   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1029   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1030   M = header[1]; N = header[2]; nz = header[3];
1031 
1032   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1033 
1034   /*
1035      This code adds extra rows to make sure the number of rows is
1036     divisible by the blocksize
1037   */
1038   mbs        = M/bs;
1039   extra_rows = bs - M + bs*(mbs);
1040   if (extra_rows == bs) extra_rows = 0;
1041   else                  mbs++;
1042   if (extra_rows) {
1043     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1044   }
1045 
1046   /* read in row lengths */
1047   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1048   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1049   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1050 
1051   /* read in column indices */
1052   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1053   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1054   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1055 
1056   /* loop over row lengths determining block row lengths */
1057   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1058   PetscMemzero(browlengths,mbs*sizeof(int));
1059   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1060   PetscMemzero(mask,mbs*sizeof(int));
1061   masked = mask + mbs;
1062   rowcount = 0; nzcount = 0;
1063   for ( i=0; i<mbs; i++ ) {
1064     nmask = 0;
1065     for ( j=0; j<bs; j++ ) {
1066       kmax = rowlengths[rowcount];
1067       for ( k=0; k<kmax; k++ ) {
1068         tmp = jj[nzcount++]/bs;
1069         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1070       }
1071       rowcount++;
1072     }
1073     browlengths[i] += nmask;
1074     /* zero out the mask elements we set */
1075     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1076   }
1077 
1078   /* create our matrix */
1079   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1080          CHKERRQ(ierr);
1081   B = *A;
1082   a = (Mat_SeqBAIJ *) B->data;
1083 
1084   /* set matrix "i" values */
1085   a->i[0] = 0;
1086   for ( i=1; i<= mbs; i++ ) {
1087     a->i[i]      = a->i[i-1] + browlengths[i-1];
1088     a->ilen[i-1] = browlengths[i-1];
1089   }
1090   a->indexshift = 0;
1091   a->nz         = 0;
1092   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1093 
1094   /* read in nonzero values */
1095   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1096   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1097   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1098 
1099   /* set "a" and "j" values into matrix */
1100   nzcount = 0; jcount = 0;
1101   for ( i=0; i<mbs; i++ ) {
1102     nzcountb = nzcount;
1103     nmask    = 0;
1104     for ( j=0; j<bs; j++ ) {
1105       kmax = rowlengths[i*bs+j];
1106       for ( k=0; k<kmax; k++ ) {
1107         tmp = jj[nzcount++]/bs;
1108 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1109       }
1110       rowcount++;
1111     }
1112     /* sort the masked values */
1113     PetscSortInt(nmask,masked);
1114 
1115     /* set "j" values into matrix */
1116     maskcount = 1;
1117     for ( j=0; j<nmask; j++ ) {
1118       a->j[jcount++]  = masked[j];
1119       mask[masked[j]] = maskcount++;
1120     }
1121     /* set "a" values into matrix */
1122     ishift = bs2*a->i[i];
1123     for ( j=0; j<bs; j++ ) {
1124       kmax = rowlengths[i*bs+j];
1125       for ( k=0; k<kmax; k++ ) {
1126         tmp    = jj[nzcountb]/bs ;
1127         block  = mask[tmp] - 1;
1128         point  = jj[nzcountb] - bs*tmp;
1129         idx    = ishift + bs2*block + j + bs*point;
1130         a->a[idx] = aa[nzcountb++];
1131       }
1132     }
1133     /* zero out the mask elements we set */
1134     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1135   }
1136   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1137 
1138   PetscFree(rowlengths);
1139   PetscFree(browlengths);
1140   PetscFree(aa);
1141   PetscFree(jj);
1142   PetscFree(mask);
1143 
1144   B->assembled = PETSC_TRUE;
1145 
1146   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1147   if (flg) {
1148     Viewer tviewer;
1149     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1150     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1151     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1152     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1153   }
1154   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1155   if (flg) {
1156     Viewer tviewer;
1157     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1158     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1159     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1160     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1161   }
1162   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1163   if (flg) {
1164     Viewer tviewer;
1165     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1166     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1167     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1168   }
1169   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1170   if (flg) {
1171     Viewer tviewer;
1172     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1173     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1174     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1175     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1176   }
1177   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1178   if (flg) {
1179     Viewer tviewer;
1180     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1181     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1182     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1183     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1184   }
1185   return 0;
1186 }
1187 
1188 
1189 
1190