xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b61a851f6b27729bedaaf02efee3e442e5d67aa7)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.52 1996/06/25 16:03:11 balay Exp curfman $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
220 {
221   Mat         A = (Mat) obj;
222   ViewerType  vtype;
223   int         ierr;
224 
225   if (!viewer) {
226     viewer = STDOUT_VIEWER_SELF;
227   }
228 
229   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
230   if (vtype == MATLAB_VIEWER) {
231     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
232   }
233   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234     return MatView_SeqBAIJ_ASCII(A,viewer);
235   }
236   else if (vtype == BINARY_FILE_VIEWER) {
237     return MatView_SeqBAIJ_Binary(A,viewer);
238   }
239   else if (vtype == DRAW_VIEWER) {
240     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
241   }
242   return 0;
243 }
244 
245 #define CHUNKSIZE  10
246 
247 /* This version has row oriented v  */
248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249 {
250   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
254   int          ridx,cidx,bs2=a->bs2;
255   Scalar      *ap,value,*aa=a->a,*bap;
256 
257   for ( k=0; k<m; k++ ) { /* loop over added rows */
258     row  = im[k]; brow = row/bs;
259     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262     rmax = imax[brow]; nrow = ailen[brow];
263     low = 0;
264     for ( l=0; l<n; l++ ) { /* loop over added columns */
265       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267       col = in[l]; bcol = col/bs;
268       ridx = row % bs; cidx = col % bs;
269       if (roworiented) {
270         value = *v++;
271       }
272       else {
273         value = v[k + l*m];
274       }
275       if (!sorted) low = 0; high = nrow;
276       while (high-low > 5) {
277         t = (low+high)/2;
278         if (rp[t] > bcol) high = t;
279         else             low  = t;
280       }
281       for ( i=low; i<high; i++ ) {
282         if (rp[i] > bcol) break;
283         if (rp[i] == bcol) {
284           bap  = ap +  bs2*i + bs*cidx + ridx;
285           if (is == ADD_VALUES) *bap += value;
286           else                  *bap  = value;
287           goto noinsert;
288         }
289       }
290       if (nonew) goto noinsert;
291       if (nrow >= rmax) {
292         /* there is no extra room in row, therefore enlarge */
293         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294         Scalar *new_a;
295 
296         /* malloc new storage space */
297         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
299         new_j   = (int *) (new_a + bs2*new_nz);
300         new_i   = new_j + new_nz;
301 
302         /* copy over old data into new slots */
303         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
304         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308                                                            len*sizeof(int));
309         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313         /* free up old matrix storage */
314         PetscFree(a->a);
315         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317         a->singlemalloc = 1;
318 
319         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
321         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
322         a->maxnz += bs2*CHUNKSIZE;
323         a->reallocs++;
324         a->nz++;
325       }
326       N = nrow++ - 1;
327       /* shift up all the later entries in this row */
328       for ( ii=N; ii>=i; ii-- ) {
329         rp[ii+1] = rp[ii];
330          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331       }
332       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333       rp[i] = bcol;
334       ap[bs2*i + bs*cidx + ridx] = value;
335       noinsert:;
336       low = i;
337     }
338     ailen[brow] = nrow;
339   }
340   return 0;
341 }
342 
343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
344 {
345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
346   *m = a->m; *n = a->n;
347   return 0;
348 }
349 
350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = 0; *n = a->m;
354   return 0;
355 }
356 
357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
361   Scalar      *aa,*v_i,*aa_i;
362 
363   bs  = a->bs;
364   ai  = a->i;
365   aj  = a->j;
366   aa  = a->a;
367   bs2 = a->bs2;
368 
369   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
370 
371   bn  = row/bs;   /* Block number */
372   bp  = row % bs; /* Block Position */
373   M   = ai[bn+1] - ai[bn];
374   *nz = bs*M;
375 
376   if (v) {
377     *v = 0;
378     if (*nz) {
379       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
380       for ( i=0; i<M; i++ ) { /* for each block in the block row */
381         v_i  = *v + i*bs;
382         aa_i = aa + bs2*(ai[bn] + i);
383         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
384       }
385     }
386   }
387 
388   if (idx) {
389     *idx = 0;
390     if (*nz) {
391       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
392       for ( i=0; i<M; i++ ) { /* for each block in the block row */
393         idx_i = *idx + i*bs;
394         itmp  = bs*aj[ai[bn] + i];
395         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
396       }
397     }
398   }
399   return 0;
400 }
401 
402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
403 {
404   if (idx) {if (*idx) PetscFree(*idx);}
405   if (v)   {if (*v)   PetscFree(*v);}
406   return 0;
407 }
408 
409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410 {
411   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412   Mat         C;
413   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
414   int         *rows,*cols,bs2=a->bs2;
415   Scalar      *array=a->a;
416 
417   if (B==PETSC_NULL && mbs!=nbs)
418     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420   PetscMemzero(col,(1+nbs)*sizeof(int));
421 
422   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424   PetscFree(col);
425   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426   cols = rows + bs;
427   for ( i=0; i<mbs; i++ ) {
428     cols[0] = i*bs;
429     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430     len = ai[i+1] - ai[i];
431     for ( j=0; j<len; j++ ) {
432       rows[0] = (*aj++)*bs;
433       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435       array += bs2;
436     }
437   }
438   PetscFree(rows);
439 
440   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442 
443   if (B != PETSC_NULL) {
444     *B = C;
445   } else {
446     /* This isn't really an in-place transpose */
447     PetscFree(a->a);
448     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449     if (a->diag) PetscFree(a->diag);
450     if (a->ilen) PetscFree(a->ilen);
451     if (a->imax) PetscFree(a->imax);
452     if (a->solve_work) PetscFree(a->solve_work);
453     PetscFree(a);
454     PetscMemcpy(A,C,sizeof(struct _Mat));
455     PetscHeaderDestroy(C);
456   }
457   return 0;
458 }
459 
460 
461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462 {
463   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465   int        m = a->m,*ip, N, *ailen = a->ilen;
466   int        mbs = a->mbs, bs2 = a->bs2;
467   Scalar     *aa = a->a, *ap;
468 
469   if (mode == FLUSH_ASSEMBLY) return 0;
470 
471   for ( i=1; i<mbs; i++ ) {
472     /* move each row back by the amount of empty slots (fshift) before it*/
473     fshift += imax[i-1] - ailen[i-1];
474     if (fshift) {
475       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476       N = ailen[i];
477       for ( j=0; j<N; j++ ) {
478         ip[j-fshift] = ip[j];
479         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480       }
481     }
482     ai[i] = ai[i-1] + ailen[i-1];
483   }
484   if (mbs) {
485     fshift += imax[mbs-1] - ailen[mbs-1];
486     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487   }
488   /* reset ilen and imax for each row */
489   for ( i=0; i<mbs; i++ ) {
490     ailen[i] = imax[i] = ai[i+1] - ai[i];
491   }
492   a->nz = ai[mbs];
493 
494   /* diagonals may have moved, so kill the diagonal pointers */
495   if (fshift && a->diag) {
496     PetscFree(a->diag);
497     PLogObjectMemory(A,-(m+1)*sizeof(int));
498     a->diag = 0;
499   }
500   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502            a->reallocs);
503   return 0;
504 }
505 
506 static int MatZeroEntries_SeqBAIJ(Mat A)
507 {
508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
509   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
510   return 0;
511 }
512 
513 int MatDestroy_SeqBAIJ(PetscObject obj)
514 {
515   Mat         A  = (Mat) obj;
516   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
517 
518 #if defined(PETSC_LOG)
519   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
520 #endif
521   PetscFree(a->a);
522   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
523   if (a->diag) PetscFree(a->diag);
524   if (a->ilen) PetscFree(a->ilen);
525   if (a->imax) PetscFree(a->imax);
526   if (a->solve_work) PetscFree(a->solve_work);
527   if (a->mult_work) PetscFree(a->mult_work);
528   PetscFree(a);
529   PLogObjectDestroy(A);
530   PetscHeaderDestroy(A);
531   return 0;
532 }
533 
534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
535 {
536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
537   if      (op == ROW_ORIENTED)              a->roworiented = 1;
538   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
539   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
540   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
541   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
542   else if (op == ROWS_SORTED ||
543            op == SYMMETRIC_MATRIX ||
544            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
545            op == YES_NEW_DIAGONALS)
546     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
547   else if (op == NO_NEW_DIAGONALS)
548     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
549   else
550     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
551   return 0;
552 }
553 
554 
555 /* -------------------------------------------------------*/
556 /* Should check that shapes of vectors and matrices match */
557 /* -------------------------------------------------------*/
558 #include "pinclude/plapack.h"
559 
560 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
561 {
562   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
563   Scalar          *xg,*zg;
564   register Scalar *x,*z,*v,sum;
565   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
566 
567   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
568   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
569 
570   idx   = a->j;
571   v     = a->a;
572   ii    = a->i;
573 
574   for ( i=0; i<mbs; i++ ) {
575     n    = ii[1] - ii[0]; ii++;
576     sum  = 0.0;
577     while (n--) sum += *v++ * x[*idx++];
578     z[i] = sum;
579   }
580   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
581   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
582   PLogFlops(2*a->nz - a->m);
583   return 0;
584 }
585 
586 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
587 {
588   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
589   Scalar          *xg,*zg;
590   register Scalar *x,*z,*v,*xb,sum1,sum2;
591   register Scalar x1,x2;
592   int             mbs=a->mbs,i,*idx,*ii;
593   int             j,n,ierr;
594 
595   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
596   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
597 
598   idx   = a->j;
599   v     = a->a;
600   ii    = a->i;
601 
602   for ( i=0; i<mbs; i++ ) {
603     n  = ii[1] - ii[0]; ii++;
604     sum1 = 0.0; sum2 = 0.0;
605     for ( j=0; j<n; j++ ) {
606       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
607       sum1 += v[0]*x1 + v[2]*x2;
608       sum2 += v[1]*x1 + v[3]*x2;
609       v += 4;
610     }
611     z[0] = sum1; z[1] = sum2;
612     z += 2;
613   }
614   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
615   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
616   PLogFlops(4*a->nz - a->m);
617   return 0;
618 }
619 
620 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
621 {
622   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
623   Scalar          *xg,*zg;
624   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
625   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
626 
627   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
628   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
629 
630   idx   = a->j;
631   v     = a->a;
632   ii    = a->i;
633 
634   for ( i=0; i<mbs; i++ ) {
635     n  = ii[1] - ii[0]; ii++;
636     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
637     for ( j=0; j<n; j++ ) {
638       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
639       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
640       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
641       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
642       v += 9;
643     }
644     z[0] = sum1; z[1] = sum2; z[2] = sum3;
645     z += 3;
646   }
647   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
648   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
649   PLogFlops(18*a->nz - a->m);
650   return 0;
651 }
652 
653 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
654 {
655   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
656   Scalar          *xg,*zg;
657   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
658   register Scalar x1,x2,x3,x4;
659   int             mbs=a->mbs,i,*idx,*ii;
660   int             j,n,ierr;
661 
662   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
663   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
664 
665   idx   = a->j;
666   v     = a->a;
667   ii    = a->i;
668 
669   for ( i=0; i<mbs; i++ ) {
670     n  = ii[1] - ii[0]; ii++;
671     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
672     for ( j=0; j<n; j++ ) {
673       xb = x + 4*(*idx++);
674       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
675       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
676       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
677       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
678       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
679       v += 16;
680     }
681     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
682     z += 4;
683   }
684   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
685   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
686   PLogFlops(32*a->nz - a->m);
687   return 0;
688 }
689 
690 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
691 {
692   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
693   Scalar          *xg,*zg;
694   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
695   register Scalar x1,x2,x3,x4,x5;
696   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
697 
698   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
699   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
700 
701   idx   = a->j;
702   v     = a->a;
703   ii    = a->i;
704 
705   for ( i=0; i<mbs; i++ ) {
706     n  = ii[1] - ii[0]; ii++;
707     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
708     for ( j=0; j<n; j++ ) {
709       xb = x + 5*(*idx++);
710       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
711       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
712       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
713       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
714       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
715       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
716       v += 25;
717     }
718     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
719     z += 5;
720   }
721   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
722   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
723   PLogFlops(50*a->nz - a->m);
724   return 0;
725 }
726 
727 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
728 {
729   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
730   Scalar          *xg,*zg;
731   register Scalar *x,*z,*v,*xb;
732   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
733   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
734 
735   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
736   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
737 
738   idx   = a->j;
739   v     = a->a;
740   ii    = a->i;
741 
742 
743   if (!a->mult_work) {
744     k = PetscMax(a->m,a->n);
745     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
746   }
747   work = a->mult_work;
748   for ( i=0; i<mbs; i++ ) {
749     n     = ii[1] - ii[0]; ii++;
750     ncols = n*bs;
751     workt = work;
752     for ( j=0; j<n; j++ ) {
753       xb = x + bs*(*idx++);
754       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
755       workt += bs;
756     }
757     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
758     v += n*bs2;
759     z += bs;
760   }
761    ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
762   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
763   PLogFlops(2*a->nz*bs2 - a->m);
764   return 0;
765 }
766 
767 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
768 {
769   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
770   Scalar          *xg,*yg,*zg;
771   register Scalar *x,*y,*z,*v,sum;
772   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
773 
774   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
775   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
776   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
777 
778   idx   = a->j;
779   v     = a->a;
780   ii    = a->i;
781 
782   for ( i=0; i<mbs; i++ ) {
783     n    = ii[1] - ii[0]; ii++;
784     sum  = y[i];
785     while (n--) sum += *v++ * x[*idx++];
786     z[i] = sum;
787   }
788   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
789   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
790   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
791   PLogFlops(2*a->nz);
792   return 0;
793 }
794 
795 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
796 {
797   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
798   Scalar          *xg,*yg,*zg;
799   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
800   register Scalar x1,x2;
801   int             mbs=a->mbs,i,*idx,*ii;
802   int             j,n,ierr;
803 
804   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
805   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
806   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
807 
808   idx   = a->j;
809   v     = a->a;
810   ii    = a->i;
811 
812   for ( i=0; i<mbs; i++ ) {
813     n  = ii[1] - ii[0]; ii++;
814     sum1 = y[0]; sum2 = y[1];
815     for ( j=0; j<n; j++ ) {
816       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
817       sum1 += v[0]*x1 + v[2]*x2;
818       sum2 += v[1]*x1 + v[3]*x2;
819       v += 4;
820     }
821     z[0] = sum1; z[1] = sum2;
822     z += 2; y += 2;
823   }
824   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
825   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
826   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
827   PLogFlops(4*a->nz);
828   return 0;
829 }
830 
831 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
834   Scalar          *xg,*yg,*zg;
835   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
836   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
837 
838   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
839   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
840   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
841 
842   idx   = a->j;
843   v     = a->a;
844   ii    = a->i;
845 
846   for ( i=0; i<mbs; i++ ) {
847     n  = ii[1] - ii[0]; ii++;
848     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
849     for ( j=0; j<n; j++ ) {
850       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
851       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
852       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
853       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
854       v += 9;
855     }
856     z[0] = sum1; z[1] = sum2; z[2] = sum3;
857     z += 3; y += 3;
858   }
859   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
860   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
861   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
862   PLogFlops(18*a->nz);
863   return 0;
864 }
865 
866 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
867 {
868   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
869   Scalar          *xg,*yg,*zg;
870   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
871   register Scalar x1,x2,x3,x4;
872   int             mbs=a->mbs,i,*idx,*ii;
873   int             j,n,ierr;
874 
875   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
876   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
877   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
878 
879   idx   = a->j;
880   v     = a->a;
881   ii    = a->i;
882 
883   for ( i=0; i<mbs; i++ ) {
884     n  = ii[1] - ii[0]; ii++;
885     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
886     for ( j=0; j<n; j++ ) {
887       xb = x + 4*(*idx++);
888       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
889       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
890       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
891       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
892       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
893       v += 16;
894     }
895     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
896     z += 4; y += 4;
897   }
898   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
899   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
900   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
901   PLogFlops(32*a->nz);
902   return 0;
903 }
904 
905 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
906 {
907   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
908   Scalar          *xg,*yg,*zg;
909   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
910   register Scalar x1,x2,x3,x4,x5;
911   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
912 
913   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
914   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
915   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
916 
917   idx   = a->j;
918   v     = a->a;
919   ii    = a->i;
920 
921   for ( i=0; i<mbs; i++ ) {
922     n  = ii[1] - ii[0]; ii++;
923     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
924     for ( j=0; j<n; j++ ) {
925       xb = x + 5*(*idx++);
926       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
927       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
928       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
929       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
930       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
931       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
932       v += 25;
933     }
934     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
935     z += 5; y += 5;
936   }
937   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
938   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
939   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
940   PLogFlops(50*a->nz);
941   return 0;
942 }
943 
944 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
945 {
946   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
947   Scalar          *xg,*zg;
948   register Scalar *x,*z,*v,*xb;
949   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
950   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
951 
952   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
953 
954   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
955   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
956 
957   idx   = a->j;
958   v     = a->a;
959   ii    = a->i;
960 
961 
962   if (!a->mult_work) {
963     k = PetscMax(a->m,a->n);
964     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
965   }
966   work = a->mult_work;
967   for ( i=0; i<mbs; i++ ) {
968     n     = ii[1] - ii[0]; ii++;
969     ncols = n*bs;
970     workt = work;
971     for ( j=0; j<n; j++ ) {
972       xb = x + bs*(*idx++);
973       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
974       workt += bs;
975     }
976     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
977     v += n*bs2;
978     z += bs;
979   }
980   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
981   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
982   PLogFlops(2*a->nz*bs2 );
983   return 0;
984 }
985 
986 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
987 {
988   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
989   Scalar          *xg,*zg,*zb;
990   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
991   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
992   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
993 
994 
995   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
996   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
997   PetscMemzero(z,N*sizeof(Scalar));
998 
999   idx   = a->j;
1000   v     = a->a;
1001   ii    = a->i;
1002 
1003   switch (bs) {
1004   case 1:
1005     for ( i=0; i<mbs; i++ ) {
1006       n  = ii[1] - ii[0]; ii++;
1007       xb = x + i; x1 = xb[0];
1008       ib = idx + ai[i];
1009       for ( j=0; j<n; j++ ) {
1010         rval    = ib[j];
1011         z[rval] += *v++ * x1;
1012       }
1013     }
1014     break;
1015   case 2:
1016     for ( i=0; i<mbs; i++ ) {
1017       n  = ii[1] - ii[0]; ii++;
1018       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1019       ib = idx + ai[i];
1020       for ( j=0; j<n; j++ ) {
1021         rval      = ib[j]*2;
1022         z[rval++] += v[0]*x1 + v[1]*x2;
1023         z[rval++] += v[2]*x1 + v[3]*x2;
1024         v += 4;
1025       }
1026     }
1027     break;
1028   case 3:
1029     for ( i=0; i<mbs; i++ ) {
1030       n  = ii[1] - ii[0]; ii++;
1031       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1032       ib = idx + ai[i];
1033       for ( j=0; j<n; j++ ) {
1034         rval      = ib[j]*3;
1035         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1036         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1037         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1038         v += 9;
1039       }
1040     }
1041     break;
1042   case 4:
1043     for ( i=0; i<mbs; i++ ) {
1044       n  = ii[1] - ii[0]; ii++;
1045       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1046       ib = idx + ai[i];
1047       for ( j=0; j<n; j++ ) {
1048         rval      = ib[j]*4;
1049         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1050         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1051         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1052         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1053         v += 16;
1054       }
1055     }
1056     break;
1057   case 5:
1058     for ( i=0; i<mbs; i++ ) {
1059       n  = ii[1] - ii[0]; ii++;
1060       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1061       x4 = xb[3];   x5 = xb[4];
1062       ib = idx + ai[i];
1063       for ( j=0; j<n; j++ ) {
1064         rval      = ib[j]*5;
1065         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1066         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1067         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1068         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1069         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1070         v += 25;
1071       }
1072     }
1073     break;
1074       /* block sizes larger then 5 by 5 are handled by BLAS */
1075     default: {
1076       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1077       if (!a->mult_work) {
1078         k = PetscMax(a->m,a->n);
1079         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1080         CHKPTRQ(a->mult_work);
1081       }
1082       work = a->mult_work;
1083       for ( i=0; i<mbs; i++ ) {
1084         n     = ii[1] - ii[0]; ii++;
1085         ncols = n*bs;
1086         PetscMemzero(work,ncols*sizeof(Scalar));
1087         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1088         v += n*bs2;
1089         x += bs;
1090         workt = work;
1091         for ( j=0; j<n; j++ ) {
1092           zb = z + bs*(*idx++);
1093           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1094           workt += bs;
1095         }
1096       }
1097     }
1098   }
1099   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1100   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1101   PLogFlops(2*a->nz*a->bs2 - a->n);
1102   return 0;
1103 }
1104 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1105 {
1106   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1107   Scalar          *xg,*zg,*zb;
1108   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1109   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1110   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1111 
1112 
1113 
1114   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1115   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1116 
1117   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1118   else PetscMemzero(z,N*sizeof(Scalar));
1119 
1120 
1121   idx   = a->j;
1122   v     = a->a;
1123   ii    = a->i;
1124 
1125   switch (bs) {
1126   case 1:
1127     for ( i=0; i<mbs; i++ ) {
1128       n  = ii[1] - ii[0]; ii++;
1129       xb = x + i; x1 = xb[0];
1130       ib = idx + ai[i];
1131       for ( j=0; j<n; j++ ) {
1132         rval    = ib[j];
1133         z[rval] += *v++ * x1;
1134       }
1135     }
1136     break;
1137   case 2:
1138     for ( i=0; i<mbs; i++ ) {
1139       n  = ii[1] - ii[0]; ii++;
1140       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1141       ib = idx + ai[i];
1142       for ( j=0; j<n; j++ ) {
1143         rval      = ib[j]*2;
1144         z[rval++] += v[0]*x1 + v[1]*x2;
1145         z[rval++] += v[2]*x1 + v[3]*x2;
1146         v += 4;
1147       }
1148     }
1149     break;
1150   case 3:
1151     for ( i=0; i<mbs; i++ ) {
1152       n  = ii[1] - ii[0]; ii++;
1153       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1154       ib = idx + ai[i];
1155       for ( j=0; j<n; j++ ) {
1156         rval      = ib[j]*3;
1157         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1158         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1159         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1160         v += 9;
1161       }
1162     }
1163     break;
1164   case 4:
1165     for ( i=0; i<mbs; i++ ) {
1166       n  = ii[1] - ii[0]; ii++;
1167       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1168       ib = idx + ai[i];
1169       for ( j=0; j<n; j++ ) {
1170         rval      = ib[j]*4;
1171         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1172         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1173         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1174         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1175         v += 16;
1176       }
1177     }
1178     break;
1179   case 5:
1180     for ( i=0; i<mbs; i++ ) {
1181       n  = ii[1] - ii[0]; ii++;
1182       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1183       x4 = xb[3];   x5 = xb[4];
1184       ib = idx + ai[i];
1185       for ( j=0; j<n; j++ ) {
1186         rval      = ib[j]*5;
1187         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1188         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1189         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1190         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1191         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1192         v += 25;
1193       }
1194     }
1195     break;
1196       /* block sizes larger then 5 by 5 are handled by BLAS */
1197     default: {
1198       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1199       if (!a->mult_work) {
1200         k = PetscMax(a->m,a->n);
1201         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1202         CHKPTRQ(a->mult_work);
1203       }
1204       work = a->mult_work;
1205       for ( i=0; i<mbs; i++ ) {
1206         n     = ii[1] - ii[0]; ii++;
1207         ncols = n*bs;
1208         PetscMemzero(work,ncols*sizeof(Scalar));
1209         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1210         v += n*bs2;
1211         x += bs;
1212         workt = work;
1213         for ( j=0; j<n; j++ ) {
1214           zb = z + bs*(*idx++);
1215           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1216           workt += bs;
1217         }
1218       }
1219     }
1220   }
1221   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1222   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1223   PLogFlops(2*a->nz*a->bs2);
1224   return 0;
1225 }
1226 
1227 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
1228 {
1229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1230   if (nz)  *nz  = a->bs2*a->nz;
1231   if (nza) *nza = a->maxnz;
1232   if (mem) *mem = (int)A->mem;
1233   return 0;
1234 }
1235 
1236 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1237 {
1238   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1239 
1240   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1241 
1242   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1243   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1244       (a->nz != b->nz)) {
1245     *flg = PETSC_FALSE; return 0;
1246   }
1247 
1248   /* if the a->i are the same */
1249   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1250     *flg = PETSC_FALSE; return 0;
1251   }
1252 
1253   /* if a->j are the same */
1254   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1255     *flg = PETSC_FALSE; return 0;
1256   }
1257 
1258   /* if a->a are the same */
1259   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1260     *flg = PETSC_FALSE; return 0;
1261   }
1262   *flg = PETSC_TRUE;
1263   return 0;
1264 
1265 }
1266 
1267 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1268 {
1269   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1270   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1271   Scalar      *x, zero = 0.0,*aa,*aa_j;
1272 
1273   bs   = a->bs;
1274   aa   = a->a;
1275   ai   = a->i;
1276   aj   = a->j;
1277   ambs = a->mbs;
1278   bs2  = a->bs2;
1279 
1280   VecSet(&zero,v);
1281   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1282   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1283   for ( i=0; i<ambs; i++ ) {
1284     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1285       if (aj[j] == i) {
1286         row  = i*bs;
1287         aa_j = aa+j*bs2;
1288         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1289         break;
1290       }
1291     }
1292   }
1293   return 0;
1294 }
1295 
1296 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1297 {
1298   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1299   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1300   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1301 
1302   ai  = a->i;
1303   aj  = a->j;
1304   aa  = a->a;
1305   m   = a->m;
1306   n   = a->n;
1307   bs  = a->bs;
1308   mbs = a->mbs;
1309   bs2 = a->bs2;
1310   if (ll) {
1311     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1312     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1313     for ( i=0; i<mbs; i++ ) { /* for each block row */
1314       M  = ai[i+1] - ai[i];
1315       li = l + i*bs;
1316       v  = aa + bs2*ai[i];
1317       for ( j=0; j<M; j++ ) { /* for each block */
1318         for ( k=0; k<bs2; k++ ) {
1319           (*v++) *= li[k%bs];
1320         }
1321       }
1322     }
1323   }
1324 
1325   if (rr) {
1326     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1327     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1328     for ( i=0; i<mbs; i++ ) { /* for each block row */
1329       M  = ai[i+1] - ai[i];
1330       v  = aa + bs2*ai[i];
1331       for ( j=0; j<M; j++ ) { /* for each block */
1332         ri = r + bs*aj[ai[i]+j];
1333         for ( k=0; k<bs; k++ ) {
1334           x = ri[k];
1335           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1336         }
1337       }
1338     }
1339   }
1340   return 0;
1341 }
1342 
1343 
1344 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1345 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1346 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1347 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1348 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1349 
1350 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1351 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1352 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1353 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1354 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1355 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1356 
1357 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1358 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1359 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1360 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1361 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1362 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1363 
1364 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1365 {
1366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1367   Scalar      *v = a->a;
1368   double      sum = 0.0;
1369   int         i,nz=a->nz,bs2=a->bs2;
1370 
1371   if (type == NORM_FROBENIUS) {
1372     for (i=0; i< bs2*nz; i++ ) {
1373 #if defined(PETSC_COMPLEX)
1374       sum += real(conj(*v)*(*v)); v++;
1375 #else
1376       sum += (*v)*(*v); v++;
1377 #endif
1378     }
1379     *norm = sqrt(sum);
1380   }
1381   else {
1382     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1383   }
1384   return 0;
1385 }
1386 
1387 /*
1388      note: This can only work for identity for row and col. It would
1389    be good to check this and otherwise generate an error.
1390 */
1391 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1392 {
1393   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1394   Mat         outA;
1395   int         ierr;
1396 
1397   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1398 
1399   outA          = inA;
1400   inA->factor   = FACTOR_LU;
1401   a->row        = row;
1402   a->col        = col;
1403 
1404   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1405 
1406   if (!a->diag) {
1407     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1408   }
1409   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1410   return 0;
1411 }
1412 
1413 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1414 {
1415   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1416   int         one = 1, totalnz = a->bs2*a->nz;
1417   BLscal_( &totalnz, alpha, a->a, &one );
1418   PLogFlops(totalnz);
1419   return 0;
1420 }
1421 
1422 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1423 {
1424   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1425   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1426   int        *ai = a->i, *ailen = a->ilen;
1427   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1428   Scalar     *ap, *aa = a->a, zero = 0.0;
1429 
1430   for ( k=0; k<m; k++ ) { /* loop over rows */
1431     row  = im[k]; brow = row/bs;
1432     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1433     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1434     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1435     nrow = ailen[brow];
1436     for ( l=0; l<n; l++ ) { /* loop over columns */
1437       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1438       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1439       col  = in[l] ;
1440       bcol = col/bs;
1441       cidx = col%bs;
1442       ridx = row%bs;
1443       high = nrow;
1444       low  = 0; /* assume unsorted */
1445       while (high-low > 5) {
1446         t = (low+high)/2;
1447         if (rp[t] > bcol) high = t;
1448         else             low  = t;
1449       }
1450       for ( i=low; i<high; i++ ) {
1451         if (rp[i] > bcol) break;
1452         if (rp[i] == bcol) {
1453           *v++ = ap[bs2*i+bs*cidx+ridx];
1454           goto finished;
1455         }
1456       }
1457       *v++ = zero;
1458       finished:;
1459     }
1460   }
1461   return 0;
1462 }
1463 
1464 /* -------------------------------------------------------------------*/
1465 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1466        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1467        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1468        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1469        MatSolve_SeqBAIJ_N,0,
1470        0,0,
1471        MatLUFactor_SeqBAIJ,0,
1472        0,
1473        MatTranspose_SeqBAIJ,
1474        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1475        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1476        0,MatAssemblyEnd_SeqBAIJ,
1477        0,
1478        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1479        MatGetReordering_SeqBAIJ,
1480        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1481        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1482        MatILUFactorSymbolic_SeqBAIJ,0,
1483        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1484        MatGetSubMatrix_SeqBAIJ,0,
1485        MatConvertSameType_SeqBAIJ,0,0,
1486        MatILUFactor_SeqBAIJ,0,0,
1487        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1488        MatGetValues_SeqBAIJ,0,
1489        0,MatScale_SeqBAIJ,
1490        0};
1491 
1492 /*@C
1493    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1494    compressed row) format.  For good matrix assembly performance the
1495    user should preallocate the matrix storage by setting the parameter nz
1496    (or the array nzz).  By setting these parameters accurately, performance
1497    during matrix assembly can be increased by more than a factor of 50.
1498 
1499    Input Parameters:
1500 .  comm - MPI communicator, set to MPI_COMM_SELF
1501 .  bs - size of block
1502 .  m - number of rows
1503 .  n - number of columns
1504 .  nz - number of block nonzeros per block row (same for all rows)
1505 .  nzz - array containing the number of block nonzeros in the various block rows
1506          (possibly different for each block row) or PETSC_NULL
1507 
1508    Output Parameter:
1509 .  A - the matrix
1510 
1511    Notes:
1512    The block AIJ format is fully compatible with standard Fortran 77
1513    storage.  That is, the stored row and column indices can begin at
1514    either one (as in Fortran) or zero.  See the users' manual for details.
1515 
1516    Specify the preallocated storage with either nz or nnz (not both).
1517    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1518    allocation.  For additional details, see the users manual chapter on
1519    matrices and the file $(PETSC_DIR)/Performance.
1520 
1521 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1522 @*/
1523 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1524 {
1525   Mat         B;
1526   Mat_SeqBAIJ *b;
1527   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1528 
1529   if (mbs*bs!=m || nbs*bs!=n)
1530     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1531 
1532   *A = 0;
1533   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1534   PLogObjectCreate(B);
1535   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1536   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1537   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1538   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1539   if (!flg) {
1540     switch (bs) {
1541       case 1:
1542         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1543         B->ops.solve           = MatSolve_SeqBAIJ_1;
1544         B->ops.mult            = MatMult_SeqBAIJ_1;
1545         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1546        break;
1547       case 2:
1548         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1549         B->ops.solve           = MatSolve_SeqBAIJ_2;
1550         B->ops.mult            = MatMult_SeqBAIJ_2;
1551         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1552         break;
1553       case 3:
1554         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1555         B->ops.solve           = MatSolve_SeqBAIJ_3;
1556         B->ops.mult            = MatMult_SeqBAIJ_3;
1557         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1558         break;
1559       case 4:
1560         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1561         B->ops.solve           = MatSolve_SeqBAIJ_4;
1562         B->ops.mult            = MatMult_SeqBAIJ_4;
1563         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1564         break;
1565       case 5:
1566         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1567         B->ops.solve           = MatSolve_SeqBAIJ_5;
1568         B->ops.mult            = MatMult_SeqBAIJ_5;
1569         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1570         break;
1571     }
1572   }
1573   B->destroy          = MatDestroy_SeqBAIJ;
1574   B->view             = MatView_SeqBAIJ;
1575   B->factor           = 0;
1576   B->lupivotthreshold = 1.0;
1577   b->row              = 0;
1578   b->col              = 0;
1579   b->reallocs         = 0;
1580 
1581   b->m       = m; B->m = m; B->M = m;
1582   b->n       = n; B->n = n; B->N = n;
1583   b->mbs     = mbs;
1584   b->nbs     = nbs;
1585   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1586   if (nnz == PETSC_NULL) {
1587     if (nz == PETSC_DEFAULT) nz = 5;
1588     else if (nz <= 0)        nz = 1;
1589     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1590     nz = nz*mbs;
1591   }
1592   else {
1593     nz = 0;
1594     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1595   }
1596 
1597   /* allocate the matrix space */
1598   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1599   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1600   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1601   b->j  = (int *) (b->a + nz*bs2);
1602   PetscMemzero(b->j,nz*sizeof(int));
1603   b->i  = b->j + nz;
1604   b->singlemalloc = 1;
1605 
1606   b->i[0] = 0;
1607   for (i=1; i<mbs+1; i++) {
1608     b->i[i] = b->i[i-1] + b->imax[i-1];
1609   }
1610 
1611   /* b->ilen will count nonzeros in each block row so far. */
1612   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1613   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1614   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1615 
1616   b->bs               = bs;
1617   b->bs2              = bs2;
1618   b->mbs              = mbs;
1619   b->nz               = 0;
1620   b->maxnz            = nz*bs2;
1621   b->sorted           = 0;
1622   b->roworiented      = 1;
1623   b->nonew            = 0;
1624   b->diag             = 0;
1625   b->solve_work       = 0;
1626   b->mult_work        = 0;
1627   b->spptr            = 0;
1628   *A = B;
1629   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1630   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1631   return 0;
1632 }
1633 
1634 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1635 {
1636   Mat         C;
1637   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1638   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1639 
1640   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1641 
1642   *B = 0;
1643   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1644   PLogObjectCreate(C);
1645   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1646   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1647   C->destroy    = MatDestroy_SeqBAIJ;
1648   C->view       = MatView_SeqBAIJ;
1649   C->factor     = A->factor;
1650   c->row        = 0;
1651   c->col        = 0;
1652   C->assembled  = PETSC_TRUE;
1653 
1654   c->m = C->m   = a->m;
1655   c->n = C->n   = a->n;
1656   C->M          = a->m;
1657   C->N          = a->n;
1658 
1659   c->bs         = a->bs;
1660   c->bs2        = a->bs2;
1661   c->mbs        = a->mbs;
1662   c->nbs        = a->nbs;
1663 
1664   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1665   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1666   for ( i=0; i<mbs; i++ ) {
1667     c->imax[i] = a->imax[i];
1668     c->ilen[i] = a->ilen[i];
1669   }
1670 
1671   /* allocate the matrix space */
1672   c->singlemalloc = 1;
1673   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1674   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1675   c->j  = (int *) (c->a + nz*bs2);
1676   c->i  = c->j + nz;
1677   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1678   if (mbs > 0) {
1679     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1680     if (cpvalues == COPY_VALUES) {
1681       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1682     }
1683   }
1684 
1685   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1686   c->sorted      = a->sorted;
1687   c->roworiented = a->roworiented;
1688   c->nonew       = a->nonew;
1689 
1690   if (a->diag) {
1691     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1692     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1693     for ( i=0; i<mbs; i++ ) {
1694       c->diag[i] = a->diag[i];
1695     }
1696   }
1697   else c->diag          = 0;
1698   c->nz                 = a->nz;
1699   c->maxnz              = a->maxnz;
1700   c->solve_work         = 0;
1701   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1702   c->mult_work          = 0;
1703   *B = C;
1704   return 0;
1705 }
1706 
1707 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1708 {
1709   Mat_SeqBAIJ  *a;
1710   Mat          B;
1711   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1712   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1713   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1714   int          *masked, nmask,tmp,bs2,ishift;
1715   Scalar       *aa;
1716   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1717 
1718   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1719   bs2  = bs*bs;
1720 
1721   MPI_Comm_size(comm,&size);
1722   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1723   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1724   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1725   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1726   M = header[1]; N = header[2]; nz = header[3];
1727 
1728   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1729 
1730   /*
1731      This code adds extra rows to make sure the number of rows is
1732     divisible by the blocksize
1733   */
1734   mbs        = M/bs;
1735   extra_rows = bs - M + bs*(mbs);
1736   if (extra_rows == bs) extra_rows = 0;
1737   else                  mbs++;
1738   if (extra_rows) {
1739     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1740   }
1741 
1742   /* read in row lengths */
1743   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1744   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1745   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1746 
1747   /* read in column indices */
1748   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1749   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1750   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1751 
1752   /* loop over row lengths determining block row lengths */
1753   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1754   PetscMemzero(browlengths,mbs*sizeof(int));
1755   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1756   PetscMemzero(mask,mbs*sizeof(int));
1757   masked = mask + mbs;
1758   rowcount = 0; nzcount = 0;
1759   for ( i=0; i<mbs; i++ ) {
1760     nmask = 0;
1761     for ( j=0; j<bs; j++ ) {
1762       kmax = rowlengths[rowcount];
1763       for ( k=0; k<kmax; k++ ) {
1764         tmp = jj[nzcount++]/bs;
1765         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1766       }
1767       rowcount++;
1768     }
1769     browlengths[i] += nmask;
1770     /* zero out the mask elements we set */
1771     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1772   }
1773 
1774   /* create our matrix */
1775   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1776          CHKERRQ(ierr);
1777   B = *A;
1778   a = (Mat_SeqBAIJ *) B->data;
1779 
1780   /* set matrix "i" values */
1781   a->i[0] = 0;
1782   for ( i=1; i<= mbs; i++ ) {
1783     a->i[i]      = a->i[i-1] + browlengths[i-1];
1784     a->ilen[i-1] = browlengths[i-1];
1785   }
1786   a->nz         = 0;
1787   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1788 
1789   /* read in nonzero values */
1790   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1791   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1792   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1793 
1794   /* set "a" and "j" values into matrix */
1795   nzcount = 0; jcount = 0;
1796   for ( i=0; i<mbs; i++ ) {
1797     nzcountb = nzcount;
1798     nmask    = 0;
1799     for ( j=0; j<bs; j++ ) {
1800       kmax = rowlengths[i*bs+j];
1801       for ( k=0; k<kmax; k++ ) {
1802         tmp = jj[nzcount++]/bs;
1803 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1804       }
1805       rowcount++;
1806     }
1807     /* sort the masked values */
1808     PetscSortInt(nmask,masked);
1809 
1810     /* set "j" values into matrix */
1811     maskcount = 1;
1812     for ( j=0; j<nmask; j++ ) {
1813       a->j[jcount++]  = masked[j];
1814       mask[masked[j]] = maskcount++;
1815     }
1816     /* set "a" values into matrix */
1817     ishift = bs2*a->i[i];
1818     for ( j=0; j<bs; j++ ) {
1819       kmax = rowlengths[i*bs+j];
1820       for ( k=0; k<kmax; k++ ) {
1821         tmp    = jj[nzcountb]/bs ;
1822         block  = mask[tmp] - 1;
1823         point  = jj[nzcountb] - bs*tmp;
1824         idx    = ishift + bs2*block + j + bs*point;
1825         a->a[idx] = aa[nzcountb++];
1826       }
1827     }
1828     /* zero out the mask elements we set */
1829     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1830   }
1831   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1832 
1833   PetscFree(rowlengths);
1834   PetscFree(browlengths);
1835   PetscFree(aa);
1836   PetscFree(jj);
1837   PetscFree(mask);
1838 
1839   B->assembled = PETSC_TRUE;
1840 
1841   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1842   if (flg) {
1843     Viewer tviewer;
1844     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1845     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1846     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1847     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1848   }
1849   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1850   if (flg) {
1851     Viewer tviewer;
1852     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1853     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1854     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1855     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1856   }
1857   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1858   if (flg) {
1859     Viewer tviewer;
1860     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1861     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1862     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1863   }
1864   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1865   if (flg) {
1866     Viewer tviewer;
1867     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1868     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1869     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1870     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1871   }
1872   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1873   if (flg) {
1874     Viewer tviewer;
1875     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1876     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1877     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1878     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1879   }
1880   return 0;
1881 }
1882 
1883 
1884 
1885