xref: /petsc/src/mat/impls/baij/seq/baij.c (revision dfa27b742cf2ba09cbb22bc417bb67c000ed546d)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.26 1996/04/04 04:07:02 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 #define CHUNKSIZE  1
225 
226 /* This version has row oriented v  */
227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
228 {
229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
231   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
232   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
233   int          ridx,cidx,bs2=bs*bs;
234   Scalar      *ap,value,*aa=a->a,*bap;
235 
236   for ( k=0; k<m; k++ ) { /* loop over added rows */
237     row  = im[k]; brow = row/bs;
238     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
239     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
240     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
241     rmax = imax[brow]; nrow = ailen[brow];
242     low = 0;
243     for ( l=0; l<n; l++ ) { /* loop over added columns */
244       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
245       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
246       col = in[l] - shift; bcol = col/bs;
247       ridx = row % bs; cidx = col % bs;
248       if (roworiented) {
249         value = *v++;
250       }
251       else {
252         value = v[k + l*m];
253       }
254       if (!sorted) low = 0; high = nrow;
255       while (high-low > 5) {
256         t = (low+high)/2;
257         if (rp[t] > bcol) high = t;
258         else             low  = t;
259       }
260       for ( i=low; i<high; i++ ) {
261         if (rp[i] > bcol) break;
262         if (rp[i] == bcol) {
263           bap  = ap +  bs2*i + bs*cidx + ridx;
264           if (is == ADD_VALUES) *bap += value;
265           else                  *bap  = value;
266           goto noinsert;
267         }
268       }
269       if (nonew) goto noinsert;
270       if (nrow >= rmax) {
271         /* there is no extra room in row, therefore enlarge */
272         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
273         Scalar *new_a;
274 
275         /* malloc new storage space */
276         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
277         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
278         new_j   = (int *) (new_a + bs2*new_nz);
279         new_i   = new_j + new_nz;
280 
281         /* copy over old data into new slots */
282         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
283         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
284         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
285         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
286         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
287                                                            len*sizeof(int));
288         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
289         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
290         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
291                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
292         /* free up old matrix storage */
293         PetscFree(a->a);
294         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
295         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
296         a->singlemalloc = 1;
297 
298         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
299         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
300         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
301         a->maxnz += bs2*CHUNKSIZE;
302         a->reallocs++;
303         a->nz+=bs2;
304       }
305       N = nrow++ - 1;
306       /* shift up all the later entries in this row */
307       for ( ii=N; ii>=i; ii-- ) {
308         rp[ii+1] = rp[ii];
309          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
310       }
311       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
312       rp[i] = bcol;
313       ap[bs2*i + bs*cidx + ridx] = value;
314       noinsert:;
315       low = i;
316     }
317     ailen[brow] = nrow;
318   }
319   return 0;
320 }
321 
322 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
323 {
324   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
325   *m = a->m; *n = a->n;
326   return 0;
327 }
328 
329 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
330 {
331   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
332   *m = 0; *n = a->m;
333   return 0;
334 }
335 
336 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
337 {
338   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
339   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
340   Scalar      *aa,*v_i,*aa_i;
341 
342   bs  = a->bs;
343   ai  = a->i;
344   aj  = a->j;
345   aa  = a->a;
346   bs2 = bs*bs;
347 
348   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
349 
350   bn  = row/bs;   /* Block number */
351   bp  = row % bs; /* Block Position */
352   M   = ai[bn+1] - ai[bn];
353   *nz = bs*M;
354 
355   if (v) {
356     *v = 0;
357     if (*nz) {
358       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
359       for ( i=0; i<M; i++ ) { /* for each block in the block row */
360         v_i  = *v + i*bs;
361         aa_i = aa + bs2*(ai[bn] + i);
362         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
363       }
364     }
365   }
366 
367   if (idx) {
368     *idx = 0;
369     if (*nz) {
370       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
371       for ( i=0; i<M; i++ ) { /* for each block in the block row */
372         idx_i = *idx + i*bs;
373         itmp  = bs*aj[ai[bn] + i];
374         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
375       }
376     }
377   }
378   return 0;
379 }
380 
381 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
382 {
383   if (idx) {if (*idx) PetscFree(*idx);}
384   if (v)   {if (*v)   PetscFree(*v);}
385   return 0;
386 }
387 
388 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
389 {
390   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
391   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
392   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
393   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
394   Scalar     *aa = a->a, *ap;
395 
396   if (mode == FLUSH_ASSEMBLY) return 0;
397 
398   for ( i=1; i<mbs; i++ ) {
399     /* move each row back by the amount of empty slots (fshift) before it*/
400     fshift += imax[i-1] - ailen[i-1];
401     if (fshift) {
402       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
403       N = ailen[i];
404       for ( j=0; j<N; j++ ) {
405         ip[j-fshift] = ip[j];
406         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
407       }
408     }
409     ai[i] = ai[i-1] + ailen[i-1];
410   }
411   if (mbs) {
412     fshift += imax[mbs-1] - ailen[mbs-1];
413     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
414   }
415   /* reset ilen and imax for each row */
416   for ( i=0; i<mbs; i++ ) {
417     ailen[i] = imax[i] = ai[i+1] - ai[i];
418   }
419   a->nz = (ai[m] + shift)*bs2;
420 
421   /* diagonals may have moved, so kill the diagonal pointers */
422   if (fshift && a->diag) {
423     PetscFree(a->diag);
424     PLogObjectMemory(A,-(m+1)*sizeof(int));
425     a->diag = 0;
426   }
427   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
428            fshift,a->nz,m);
429   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
430            a->reallocs);
431   return 0;
432 }
433 
434 static int MatZeroEntries_SeqBAIJ(Mat A)
435 {
436   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
437   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
438   return 0;
439 }
440 
441 int MatDestroy_SeqBAIJ(PetscObject obj)
442 {
443   Mat         A  = (Mat) obj;
444   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
445 
446 #if defined(PETSC_LOG)
447   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
448 #endif
449   PetscFree(a->a);
450   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
451   if (a->diag) PetscFree(a->diag);
452   if (a->ilen) PetscFree(a->ilen);
453   if (a->imax) PetscFree(a->imax);
454   if (a->solve_work) PetscFree(a->solve_work);
455   if (a->mult_work) PetscFree(a->mult_work);
456   PetscFree(a);
457   PLogObjectDestroy(A);
458   PetscHeaderDestroy(A);
459   return 0;
460 }
461 
462 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
463 {
464   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
465   if      (op == ROW_ORIENTED)              a->roworiented = 1;
466   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
467   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
468   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
469   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
470   else if (op == ROWS_SORTED ||
471            op == SYMMETRIC_MATRIX ||
472            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
473            op == YES_NEW_DIAGONALS)
474     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
475   else if (op == NO_NEW_DIAGONALS)
476     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
477   else
478     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
479   return 0;
480 }
481 
482 
483 /* -------------------------------------------------------*/
484 /* Should check that shapes of vectors and matrices match */
485 /* -------------------------------------------------------*/
486 #include "pinclude/plapack.h"
487 
488 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
489 {
490   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
491   Scalar          *xg,*yg;
492   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
493   register Scalar x1,x2,x3,x4,x5;
494   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
495   int             bs = a->bs,j,n,bs2 = bs*bs;
496 
497   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
498   PetscMemzero(y,m*sizeof(Scalar));
499   x     = x;
500   idx   = a->j;
501   v     = a->a;
502   ii    = a->i;
503 
504   switch (bs) {
505     case 1:
506      for ( i=0; i<m; i++ ) {
507        n    = ii[1] - ii[0]; ii++;
508        sum  = 0.0;
509        while (n--) sum += *v++ * x[*idx++];
510        y[i] = sum;
511       }
512       break;
513     case 2:
514       for ( i=0; i<mbs; i++ ) {
515         n  = ii[1] - ii[0]; ii++;
516         sum1 = 0.0; sum2 = 0.0;
517         for ( j=0; j<n; j++ ) {
518           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
519           sum1 += v[0]*x1 + v[2]*x2;
520           sum2 += v[1]*x1 + v[3]*x2;
521           v += 4;
522         }
523         y[0] += sum1; y[1] += sum2;
524         y += 2;
525       }
526       break;
527     case 3:
528       for ( i=0; i<mbs; i++ ) {
529         n  = ii[1] - ii[0]; ii++;
530         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
531         for ( j=0; j<n; j++ ) {
532           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
533           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
534           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
535           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
536           v += 9;
537         }
538         y[0] += sum1; y[1] += sum2; y[2] += sum3;
539         y += 3;
540       }
541       break;
542     case 4:
543       for ( i=0; i<mbs; i++ ) {
544         n  = ii[1] - ii[0]; ii++;
545         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
546         for ( j=0; j<n; j++ ) {
547           xb = x + 4*(*idx++);
548           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
549           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
550           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
551           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
552           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
553           v += 16;
554         }
555         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
556         y += 4;
557       }
558       break;
559     case 5:
560       for ( i=0; i<mbs; i++ ) {
561         n  = ii[1] - ii[0]; ii++;
562         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
563         for ( j=0; j<n; j++ ) {
564           xb = x + 5*(*idx++);
565           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
566           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
567           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
568           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
569           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
570           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
571           v += 25;
572         }
573         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
574         y += 5;
575       }
576       break;
577       /* block sizes larger then 5 by 5 are handled by BLAS */
578     default: {
579       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
580       if (!a->mult_work) {
581         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
582         CHKPTRQ(a->mult_work);
583       }
584       work = a->mult_work;
585       for ( i=0; i<mbs; i++ ) {
586         n     = ii[1] - ii[0]; ii++;
587         ncols = n*bs;
588         workt = work;
589         for ( j=0; j<n; j++ ) {
590           xb = x + bs*(*idx++);
591           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
592           workt += bs;
593         }
594         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
595         v += n*bs2;
596         y += bs;
597       }
598     }
599   }
600   PLogFlops(2*bs2*a->nz - m);
601   return 0;
602 }
603 
604 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
605 {
606   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
607   if (nz)  *nz  = a->bs*a->bs*a->nz;
608   if (nza) *nza = a->maxnz;
609   if (mem) *mem = (int)A->mem;
610   return 0;
611 }
612 
613 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
614 {
615   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
616 
617   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
618 
619   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
620   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
621       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
622     *flg = PETSC_FALSE; return 0;
623   }
624 
625   /* if the a->i are the same */
626   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
627     *flg = PETSC_FALSE; return 0;
628   }
629 
630   /* if a->j are the same */
631   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
632     *flg = PETSC_FALSE; return 0;
633   }
634 
635   /* if a->a are the same */
636   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
637     *flg = PETSC_FALSE; return 0;
638   }
639   *flg = PETSC_TRUE;
640   return 0;
641 
642 }
643 
644 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
645 {
646   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
647   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
648   Scalar      *x, zero = 0.0,*aa,*aa_j;
649 
650   bs   = a->bs;
651   aa   = a->a;
652   ai   = a->i;
653   aj   = a->j;
654   ambs = a->mbs;
655   bs2  = bs*bs;
656 
657   VecSet(&zero,v);
658   VecGetArray(v,&x); VecGetLocalSize(v,&n);
659   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
660   for ( i=0; i<ambs; i++ ) {
661     for ( j=ai[i]; j<ai[i+1]; j++ ) {
662       if (aj[j] == i) {
663         row  = i*bs;
664         aa_j = aa+j*bs2;
665         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
666         break;
667       }
668     }
669   }
670   return 0;
671 }
672 
673 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
674 {
675   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
676   Scalar      *l,*r,x,*v,*aa,*li,*ri;
677   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
678 
679   ai  = a->i;
680   aj  = a->j;
681   aa  = a->a;
682   m   = a->m;
683   n   = a->n;
684   bs  = a->bs;
685   mbs = a->mbs;
686   bs2 = bs*bs;
687   if (ll) {
688     VecGetArray(ll,&l); VecGetSize(ll,&lm);
689     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
690     for ( i=0; i<mbs; i++ ) { /* for each block row */
691       M  = ai[i+1] - ai[i];
692       li = l + i*bs;
693       v  = aa + bs2*ai[i];
694       for ( j=0; j<M; j++ ) { /* for each block */
695         for ( k=0; k<bs2; k++ ) {
696           (*v++) *= li[k%bs];
697         }
698       }
699     }
700   }
701 
702   if (rr) {
703     VecGetArray(rr,&r); VecGetSize(rr,&rn);
704     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
705     for ( i=0; i<mbs; i++ ) { /* for each block row */
706       M  = ai[i+1] - ai[i];
707       v  = aa + bs2*ai[i];
708       for ( j=0; j<M; j++ ) { /* for each block */
709         ri = r + bs*aj[ai[i]+j];
710         for ( k=0; k<bs; k++ ) {
711           x = ri[k];
712           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
713         }
714       }
715     }
716   }
717   return 0;
718 }
719 
720 
721 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
722 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
723 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
724 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
725 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
726 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
727 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
728 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
729 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
730 
731 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
732 {
733   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
734   Scalar      *v = a->a;
735   double      sum = 0.0;
736   int         i;
737 
738   if (type == NORM_FROBENIUS) {
739     for (i=0; i<a->nz; i++ ) {
740 #if defined(PETSC_COMPLEX)
741       sum += real(conj(*v)*(*v)); v++;
742 #else
743       sum += (*v)*(*v); v++;
744 #endif
745     }
746     *norm = sqrt(sum);
747   }
748   else {
749     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
750   }
751   return 0;
752 }
753 
754 /*
755      note: This can only work for identity for row and col. It would
756    be good to check this and otherwise generate an error.
757 */
758 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
759 {
760   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
761   Mat         outA;
762   int         ierr;
763 
764   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
765 
766   outA          = inA;
767   inA->factor   = FACTOR_LU;
768   a->row        = row;
769   a->col        = col;
770 
771   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
772 
773   if (!a->diag) {
774     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
775   }
776   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
777   return 0;
778 }
779 
780 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
781 {
782   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
783   int         one = 1, totalnz = a->bs*a->bs*a->nz;
784   BLscal_( &totalnz, alpha, a->a, &one );
785   PLogFlops(totalnz);
786   return 0;
787 }
788 
789 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
790 {
791   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
792   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
793   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
794   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
795   Scalar     *ap, *aa = a->a, zero = 0.0;
796 
797   for ( k=0; k<m; k++ ) { /* loop over rows */
798     row  = im[k]; brow = row/bs;
799     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
800     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
801     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
802     nrow = ailen[brow];
803     for ( l=0; l<n; l++ ) { /* loop over columns */
804       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
805       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
806       col  = in[l] - shift;
807       bcol = col/bs;
808       cidx = col%bs;
809       ridx = row%bs;
810       high = nrow;
811       low  = 0; /* assume unsorted */
812       while (high-low > 5) {
813         t = (low+high)/2;
814         if (rp[t] > bcol) high = t;
815         else             low  = t;
816       }
817       for ( i=low; i<high; i++ ) {
818         if (rp[i] > bcol) break;
819         if (rp[i] == bcol) {
820           *v++ = ap[bs2*i+bs*cidx+ridx];
821           goto finished;
822         }
823       }
824       *v++ = zero;
825       finished:;
826     }
827   }
828   return 0;
829 }
830 
831 int MatPrintHelp_SeqBAIJ(Mat A)
832 {
833   static int called = 0;
834 
835   if (called) return 0; else called = 1;
836   return 0;
837 }
838 /* -------------------------------------------------------------------*/
839 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
840        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
841        MatMult_SeqBAIJ,0,
842        0,0,
843        MatSolve_SeqBAIJ,0,
844        0,0,
845        MatLUFactor_SeqBAIJ,0,
846        0,
847        0,
848        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
849        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
850        0,MatAssemblyEnd_SeqBAIJ,
851        0,
852        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
853        MatGetReordering_SeqBAIJ,
854        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
855        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
856        MatILUFactorSymbolic_SeqBAIJ,0,
857        0,0,/*  MatConvert_SeqBAIJ  */ 0,
858        0,0,
859        MatConvertSameType_SeqBAIJ,0,0,
860        MatILUFactor_SeqBAIJ,0,0,
861        0,0,
862        MatGetValues_SeqBAIJ,0,
863        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
864        0};
865 
866 /*@C
867    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
868    (the default parallel PETSc format).  For good matrix assembly performance
869    the user should preallocate the matrix storage by setting the parameter nz
870    (or nzz).  By setting these parameters accurately, performance can be
871    increased by more than a factor of 50.
872 
873    Input Parameters:
874 .  comm - MPI communicator, set to MPI_COMM_SELF
875 .  bs - size of block
876 .  m - number of rows
877 .  n - number of columns
878 .  nz - number of block nonzeros per block row (same for all rows)
879 .  nzz - number of block nonzeros per block row or PETSC_NULL
880          (possibly different for each row)
881 
882    Output Parameter:
883 .  A - the matrix
884 
885    Notes:
886    The BAIJ format, is fully compatible with standard Fortran 77
887    storage.  That is, the stored row and column indices can begin at
888    either one (as in Fortran) or zero.  See the users manual for details.
889 
890    Specify the preallocated storage with either nz or nnz (not both).
891    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
892    allocation.  For additional details, see the users manual chapter on
893    matrices and the file $(PETSC_DIR)/Performance.
894 
895 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
896 @*/
897 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
898 {
899   Mat         B;
900   Mat_SeqBAIJ *b;
901   int         i,len,ierr, flg,mbs = m/bs,bs2;
902 
903   if (mbs*bs != m)
904     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
905 
906   *A      = 0;
907   bs2     = bs*bs;
908   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
909   PLogObjectCreate(B);
910   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
911   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
912   switch (bs) {
913     case 1:
914       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
915       break;
916     case 2:
917       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
918       break;
919     case 3:
920       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
921       break;
922     case 4:
923       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
924       break;
925     case 5:
926       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
927       break;
928   }
929   B->destroy          = MatDestroy_SeqBAIJ;
930   B->view             = MatView_SeqBAIJ;
931   B->factor           = 0;
932   B->lupivotthreshold = 1.0;
933   b->row              = 0;
934   b->col              = 0;
935   b->reallocs         = 0;
936 
937   b->m       = m;
938   b->mbs     = mbs;
939   b->n       = n;
940   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
941   if (nnz == PETSC_NULL) {
942     if (nz == PETSC_DEFAULT) nz = CHUNKSIZE;
943     else if (nz <= 0)        nz = 1;
944     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
945     nz = nz*mbs;
946   }
947   else {
948     nz = 0;
949     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
950   }
951 
952   /* allocate the matrix space */
953   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
954   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
955   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
956   b->j  = (int *) (b->a + nz*bs2);
957   PetscMemzero(b->j,nz*sizeof(int));
958   b->i  = b->j + nz;
959   b->singlemalloc = 1;
960 
961   b->i[0] = 0;
962   for (i=1; i<mbs+1; i++) {
963     b->i[i] = b->i[i-1] + b->imax[i-1];
964   }
965 
966   /* b->ilen will count nonzeros in each block row so far. */
967   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
968   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
969   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
970 
971   b->bs               = bs;
972   b->mbs              = mbs;
973   b->nz               = 0;
974   b->maxnz            = nz;
975   b->sorted           = 0;
976   b->roworiented      = 1;
977   b->nonew            = 0;
978   b->diag             = 0;
979   b->solve_work       = 0;
980   b->mult_work        = 0;
981   b->spptr            = 0;
982 
983   *A = B;
984   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
985   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
986   return 0;
987 }
988 
989 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
990 {
991   Mat         C;
992   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
993   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
994 
995   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
996 
997   *B = 0;
998   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
999   PLogObjectCreate(C);
1000   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1001   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1002   C->destroy    = MatDestroy_SeqBAIJ;
1003   C->view       = MatView_SeqBAIJ;
1004   C->factor     = A->factor;
1005   c->row        = 0;
1006   c->col        = 0;
1007   C->assembled  = PETSC_TRUE;
1008 
1009   c->m          = a->m;
1010   c->n          = a->n;
1011   c->bs         = a->bs;
1012   c->mbs        = a->mbs;
1013   c->nbs        = a->nbs;
1014 
1015   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1016   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1017   for ( i=0; i<mbs; i++ ) {
1018     c->imax[i] = a->imax[i];
1019     c->ilen[i] = a->ilen[i];
1020   }
1021 
1022   /* allocate the matrix space */
1023   c->singlemalloc = 1;
1024   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1025   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1026   c->j  = (int *) (c->a + nz*bs2);
1027   c->i  = c->j + nz;
1028   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1029   if (mbs > 0) {
1030     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1031     if (cpvalues == COPY_VALUES) {
1032       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1033     }
1034   }
1035 
1036   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1037   c->sorted      = a->sorted;
1038   c->roworiented = a->roworiented;
1039   c->nonew       = a->nonew;
1040 
1041   if (a->diag) {
1042     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1043     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1044     for ( i=0; i<mbs; i++ ) {
1045       c->diag[i] = a->diag[i];
1046     }
1047   }
1048   else c->diag          = 0;
1049   c->nz                 = a->nz;
1050   c->maxnz              = a->maxnz;
1051   c->solve_work         = 0;
1052   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1053   c->mult_work          = 0;
1054   *B = C;
1055   return 0;
1056 }
1057 
1058 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1059 {
1060   Mat_SeqBAIJ  *a;
1061   Mat          B;
1062   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1063   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1064   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1065   int          *masked, nmask,tmp,ishift,bs2;
1066   Scalar       *aa;
1067   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1068 
1069   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1070   bs2  = bs*bs;
1071 
1072   MPI_Comm_size(comm,&size);
1073   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1074   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1075   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1076   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1077   M = header[1]; N = header[2]; nz = header[3];
1078 
1079   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1080 
1081   /*
1082      This code adds extra rows to make sure the number of rows is
1083     divisible by the blocksize
1084   */
1085   mbs        = M/bs;
1086   extra_rows = bs - M + bs*(mbs);
1087   if (extra_rows == bs) extra_rows = 0;
1088   else                  mbs++;
1089   if (extra_rows) {
1090     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1091   }
1092 
1093   /* read in row lengths */
1094   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1095   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1096   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1097 
1098   /* read in column indices */
1099   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1100   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1101   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1102 
1103   /* loop over row lengths determining block row lengths */
1104   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1105   PetscMemzero(browlengths,mbs*sizeof(int));
1106   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1107   PetscMemzero(mask,mbs*sizeof(int));
1108   masked = mask + mbs;
1109   rowcount = 0; nzcount = 0;
1110   for ( i=0; i<mbs; i++ ) {
1111     nmask = 0;
1112     for ( j=0; j<bs; j++ ) {
1113       kmax = rowlengths[rowcount];
1114       for ( k=0; k<kmax; k++ ) {
1115         tmp = jj[nzcount++]/bs;
1116         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1117       }
1118       rowcount++;
1119     }
1120     browlengths[i] += nmask;
1121     /* zero out the mask elements we set */
1122     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1123   }
1124 
1125   /* create our matrix */
1126   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1127          CHKERRQ(ierr);
1128   B = *A;
1129   a = (Mat_SeqBAIJ *) B->data;
1130 
1131   /* set matrix "i" values */
1132   a->i[0] = 0;
1133   for ( i=1; i<= mbs; i++ ) {
1134     a->i[i]      = a->i[i-1] + browlengths[i-1];
1135     a->ilen[i-1] = browlengths[i-1];
1136   }
1137   a->indexshift = 0;
1138   a->nz         = 0;
1139   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1140 
1141   /* read in nonzero values */
1142   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1143   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1144   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1145 
1146   /* set "a" and "j" values into matrix */
1147   nzcount = 0; jcount = 0;
1148   for ( i=0; i<mbs; i++ ) {
1149     nzcountb = nzcount;
1150     nmask    = 0;
1151     for ( j=0; j<bs; j++ ) {
1152       kmax = rowlengths[i*bs+j];
1153       for ( k=0; k<kmax; k++ ) {
1154         tmp = jj[nzcount++]/bs;
1155 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1156       }
1157       rowcount++;
1158     }
1159     /* sort the masked values */
1160     PetscSortInt(nmask,masked);
1161 
1162     /* set "j" values into matrix */
1163     maskcount = 1;
1164     for ( j=0; j<nmask; j++ ) {
1165       a->j[jcount++]  = masked[j];
1166       mask[masked[j]] = maskcount++;
1167     }
1168     /* set "a" values into matrix */
1169     ishift = bs2*a->i[i];
1170     for ( j=0; j<bs; j++ ) {
1171       kmax = rowlengths[i*bs+j];
1172       for ( k=0; k<kmax; k++ ) {
1173         tmp    = jj[nzcountb]/bs ;
1174         block  = mask[tmp] - 1;
1175         point  = jj[nzcountb] - bs*tmp;
1176         idx    = ishift + bs2*block + j + bs*point;
1177         a->a[idx] = aa[nzcountb++];
1178       }
1179     }
1180     /* zero out the mask elements we set */
1181     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1182   }
1183   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1184 
1185   PetscFree(rowlengths);
1186   PetscFree(browlengths);
1187   PetscFree(aa);
1188   PetscFree(jj);
1189   PetscFree(mask);
1190 
1191   B->assembled = PETSC_TRUE;
1192 
1193   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1194   if (flg) {
1195     Viewer tviewer;
1196     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1197     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1198     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1199     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1200   }
1201   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1202   if (flg) {
1203     Viewer tviewer;
1204     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1205     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1206     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1207     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1208   }
1209   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1210   if (flg) {
1211     Viewer tviewer;
1212     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1213     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1214     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1215   }
1216   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1217   if (flg) {
1218     Viewer tviewer;
1219     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1220     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1221     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1222     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1223   }
1224   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1225   if (flg) {
1226     Viewer tviewer;
1227     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1228     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1229     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1230     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1231   }
1232   return 0;
1233 }
1234 
1235 
1236 
1237