xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0419e6cdc609cca2c5f755ee738b150a89e0128c)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.33 1996/04/09 16:17:21 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 #define  max(a,b) (a>b ? a:b)
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
166     fprintf(fd,"  block size is %d\n",bs);
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else if (format == ASCII_FORMAT_COMMON) {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
183 #else
184           if (a->a[bs2*k + l*bs + j] != 0.0)
185             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
186 #endif
187           }
188         }
189         fprintf(fd,"\n");
190       }
191     }
192   }
193   else {
194     for ( i=0; i<a->mbs; i++ ) {
195       for ( j=0; j<bs; j++ ) {
196         fprintf(fd,"row %d:",i*bs+j);
197         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198           for ( l=0; l<bs; l++ ) {
199 #if defined(PETSC_COMPLEX)
200           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
201             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
202               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
203           }
204           else {
205             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
206           }
207 #else
208             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
209 #endif
210           }
211         }
212         fprintf(fd,"\n");
213       }
214     }
215   }
216   fflush(fd);
217   return 0;
218 }
219 
220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
221 {
222   Mat         A = (Mat) obj;
223   ViewerType  vtype;
224   int         ierr;
225 
226   if (!viewer) {
227     viewer = STDOUT_VIEWER_SELF;
228   }
229 
230   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
231   if (vtype == MATLAB_VIEWER) {
232     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
233   }
234   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235     return MatView_SeqBAIJ_ASCII(A,viewer);
236   }
237   else if (vtype == BINARY_FILE_VIEWER) {
238     return MatView_SeqBAIJ_Binary(A,viewer);
239   }
240   else if (vtype == DRAW_VIEWER) {
241     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
242   }
243   return 0;
244 }
245 
246 #define CHUNKSIZE  10
247 
248 /* This version has row oriented v  */
249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250 {
251   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
255   int          ridx,cidx,bs2=bs*bs;
256   Scalar      *ap,value,*aa=a->a,*bap;
257 
258   for ( k=0; k<m; k++ ) { /* loop over added rows */
259     row  = im[k]; brow = row/bs;
260     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
262     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
263     rmax = imax[brow]; nrow = ailen[brow];
264     low = 0;
265     for ( l=0; l<n; l++ ) { /* loop over added columns */
266       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268       col = in[l] - shift; bcol = col/bs;
269       ridx = row % bs; cidx = col % bs;
270       if (roworiented) {
271         value = *v++;
272       }
273       else {
274         value = v[k + l*m];
275       }
276       if (!sorted) low = 0; high = nrow;
277       while (high-low > 5) {
278         t = (low+high)/2;
279         if (rp[t] > bcol) high = t;
280         else             low  = t;
281       }
282       for ( i=low; i<high; i++ ) {
283         if (rp[i] > bcol) break;
284         if (rp[i] == bcol) {
285           bap  = ap +  bs2*i + bs*cidx + ridx;
286           if (is == ADD_VALUES) *bap += value;
287           else                  *bap  = value;
288           goto noinsert;
289         }
290       }
291       if (nonew) goto noinsert;
292       if (nrow >= rmax) {
293         /* there is no extra room in row, therefore enlarge */
294         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295         Scalar *new_a;
296 
297         /* malloc new storage space */
298         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
300         new_j   = (int *) (new_a + bs2*new_nz);
301         new_i   = new_j + new_nz;
302 
303         /* copy over old data into new slots */
304         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
305         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
306         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
307         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
308         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
309                                                            len*sizeof(int));
310         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
311         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
312         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
313                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
314         /* free up old matrix storage */
315         PetscFree(a->a);
316         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318         a->singlemalloc = 1;
319 
320         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
321         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
322         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323         a->maxnz += CHUNKSIZE;
324         a->reallocs++;
325         a->nz++;
326       }
327       N = nrow++ - 1;
328       /* shift up all the later entries in this row */
329       for ( ii=N; ii>=i; ii-- ) {
330         rp[ii+1] = rp[ii];
331          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332       }
333       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334       rp[i] = bcol;
335       ap[bs2*i + bs*cidx + ridx] = value;
336       noinsert:;
337       low = i;
338     }
339     ailen[brow] = nrow;
340   }
341   return 0;
342 }
343 
344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
345 {
346   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
347   *m = a->m; *n = a->n;
348   return 0;
349 }
350 
351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
352 {
353   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
354   *m = 0; *n = a->m;
355   return 0;
356 }
357 
358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
362   Scalar      *aa,*v_i,*aa_i;
363 
364   bs  = a->bs;
365   ai  = a->i;
366   aj  = a->j;
367   aa  = a->a;
368   bs2 = bs*bs;
369 
370   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
371 
372   bn  = row/bs;   /* Block number */
373   bp  = row % bs; /* Block Position */
374   M   = ai[bn+1] - ai[bn];
375   *nz = bs*M;
376 
377   if (v) {
378     *v = 0;
379     if (*nz) {
380       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
381       for ( i=0; i<M; i++ ) { /* for each block in the block row */
382         v_i  = *v + i*bs;
383         aa_i = aa + bs2*(ai[bn] + i);
384         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
385       }
386     }
387   }
388 
389   if (idx) {
390     *idx = 0;
391     if (*nz) {
392       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
393       for ( i=0; i<M; i++ ) { /* for each block in the block row */
394         idx_i = *idx + i*bs;
395         itmp  = bs*aj[ai[bn] + i];
396         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
397       }
398     }
399   }
400   return 0;
401 }
402 
403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
404 {
405   if (idx) {if (*idx) PetscFree(*idx);}
406   if (v)   {if (*v)   PetscFree(*v);}
407   return 0;
408 }
409 
410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411 {
412   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413   Mat         C;
414   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
416   Scalar      *array=a->a;
417 
418   if (B==PETSC_NULL && mbs!=nbs)
419     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421   PetscMemzero(col,(1+nbs)*sizeof(int));
422   if (shift) {
423     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
424   }
425   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
426   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
427   PetscFree(col);
428   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
429   cols = rows + bs;
430   for ( i=0; i<mbs; i++ ) {
431     cols[0] = i*bs;
432     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
433     len = ai[i+1] - ai[i];
434     for ( j=0; j<len; j++ ) {
435       rows[0] = (*aj++)*bs;
436       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
437       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
438       array += bs2;
439     }
440   }
441   PetscFree(rows);
442   if (shift) {
443     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
444   }
445 
446   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
447   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
448 
449   if (B != PETSC_NULL) {
450     *B = C;
451   } else {
452     /* This isn't really an in-place transpose */
453     PetscFree(a->a);
454     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
455     if (a->diag) PetscFree(a->diag);
456     if (a->ilen) PetscFree(a->ilen);
457     if (a->imax) PetscFree(a->imax);
458     if (a->solve_work) PetscFree(a->solve_work);
459     PetscFree(a);
460     PetscMemcpy(A,C,sizeof(struct _Mat));
461     PetscHeaderDestroy(C);
462   }
463   return 0;
464 }
465 
466 
467 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
468 {
469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
470   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
471   int        m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift;
472   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
473   Scalar     *aa = a->a, *ap;
474 
475   if (mode == FLUSH_ASSEMBLY) return 0;
476 
477   for ( i=1; i<mbs; i++ ) {
478     /* move each row back by the amount of empty slots (fshift) before it*/
479     fshift += imax[i-1] - ailen[i-1];
480     if (fshift) {
481       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
482       N = ailen[i];
483       for ( j=0; j<N; j++ ) {
484         ip[j-fshift] = ip[j];
485         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
486       }
487     }
488     ai[i] = ai[i-1] + ailen[i-1];
489   }
490   if (mbs) {
491     fshift += imax[mbs-1] - ailen[mbs-1];
492     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
493   }
494   /* reset ilen and imax for each row */
495   for ( i=0; i<mbs; i++ ) {
496     ailen[i] = imax[i] = ai[i+1] - ai[i];
497   }
498   a->nz = ai[mbs] + shift;
499 
500   /* diagonals may have moved, so kill the diagonal pointers */
501   if (fshift && a->diag) {
502     PetscFree(a->diag);
503     PLogObjectMemory(A,-(m+1)*sizeof(int));
504     a->diag = 0;
505   }
506   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
507            fshift,a->nz,m);
508   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
509            a->reallocs);
510   return 0;
511 }
512 
513 static int MatZeroEntries_SeqBAIJ(Mat A)
514 {
515   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
516   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
517   return 0;
518 }
519 
520 int MatDestroy_SeqBAIJ(PetscObject obj)
521 {
522   Mat         A  = (Mat) obj;
523   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
524 
525 #if defined(PETSC_LOG)
526   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
527 #endif
528   PetscFree(a->a);
529   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
530   if (a->diag) PetscFree(a->diag);
531   if (a->ilen) PetscFree(a->ilen);
532   if (a->imax) PetscFree(a->imax);
533   if (a->solve_work) PetscFree(a->solve_work);
534   if (a->mult_work) PetscFree(a->mult_work);
535   PetscFree(a);
536   PLogObjectDestroy(A);
537   PetscHeaderDestroy(A);
538   return 0;
539 }
540 
541 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
542 {
543   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
544   if      (op == ROW_ORIENTED)              a->roworiented = 1;
545   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
546   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
547   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
548   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
549   else if (op == ROWS_SORTED ||
550            op == SYMMETRIC_MATRIX ||
551            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
552            op == YES_NEW_DIAGONALS)
553     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
554   else if (op == NO_NEW_DIAGONALS)
555     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
556   else
557     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
558   return 0;
559 }
560 
561 
562 /* -------------------------------------------------------*/
563 /* Should check that shapes of vectors and matrices match */
564 /* -------------------------------------------------------*/
565 #include "pinclude/plapack.h"
566 
567 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
568 {
569   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
570   Scalar          *xg,*y,*zg;
571   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
572   register Scalar x1,x2,x3,x4,x5;
573   int             mbs=a->mbs,m=a->m,i,*idx,*ii;
574   int             bs=a->bs,j,n,bs2=bs*bs,ierr;
575 
576   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
577   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
578 
579   if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */
580   else if (zz!=yy){
581     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
582     PetscMemcpy(z,y,m*sizeof(Scalar));
583     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
584   }
585 
586   idx   = a->j;
587   v     = a->a;
588   ii    = a->i;
589 
590   switch (bs) {
591   case 1:
592     for ( i=0; i<mbs; i++ ) {
593       n    = ii[1] - ii[0]; ii++;
594       sum  = 0.0;
595       while (n--) sum += *v++ * x[*idx++];
596       z[i] += sum;
597     }
598     break;
599   case 2:
600     for ( i=0; i<mbs; i++ ) {
601       n  = ii[1] - ii[0]; ii++;
602       sum1 = 0.0; sum2 = 0.0;
603       for ( j=0; j<n; j++ ) {
604         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
605         sum1 += v[0]*x1 + v[2]*x2;
606         sum2 += v[1]*x1 + v[3]*x2;
607         v += 4;
608       }
609       z[0] += sum1; z[1] += sum2;
610       z += 2;
611     }
612     break;
613   case 3:
614     for ( i=0; i<mbs; i++ ) {
615       n  = ii[1] - ii[0]; ii++;
616       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
617       for ( j=0; j<n; j++ ) {
618         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
619         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
620         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
621         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
622         v += 9;
623       }
624       z[0] += sum1; z[1] += sum2; z[2] += sum3;
625       z += 3;
626     }
627     break;
628   case 4:
629     for ( i=0; i<mbs; i++ ) {
630       n  = ii[1] - ii[0]; ii++;
631       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
632       for ( j=0; j<n; j++ ) {
633         xb = x + 4*(*idx++);
634         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
635         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
636         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
637         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
638         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
639         v += 16;
640       }
641       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4;
642       z += 4;
643     }
644     break;
645   case 5:
646     for ( i=0; i<mbs; i++ ) {
647       n  = ii[1] - ii[0]; ii++;
648       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
649       for ( j=0; j<n; j++ ) {
650         xb = x + 5*(*idx++);
651         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
652         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
653         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
654         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
655         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
656         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
657         v += 25;
658       }
659       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5;
660       z += 5;
661     }
662     break;
663     /* block sizes larger then 5 by 5 are handled by BLAS */
664   default: {
665       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
666       if (!a->mult_work) {
667         k = max(a->m,a->n);
668         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
669         CHKPTRQ(a->mult_work);
670       }
671       work = a->mult_work;
672       for ( i=0; i<mbs; i++ ) {
673         n     = ii[1] - ii[0]; ii++;
674         ncols = n*bs;
675         workt = work;
676         for ( j=0; j<n; j++ ) {
677           xb = x + bs*(*idx++);
678           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
679           workt += bs;
680         }
681         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
682         v += n*bs2;
683         z += bs;
684       }
685     }
686   }
687   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
688   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
689   return 0;
690 }
691 
692 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy)
693 {
694   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
695   int         ierr,bs=a->bs,bs2=bs*bs;
696 
697   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
698   PLogFlops(2*bs2*(a->nz)-a->m);
699   return 0;
700 }
701 
702 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
703 {
704   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
705   int         ierr,bs=a->bs,bs2=bs*bs;
706 
707   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
708   PLogFlops(2*bs2*(a->nz));
709   return 0;
710 }
711 
712 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
715   Scalar          *xg,*y,*zg,*zb;
716   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
717   int             mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval;
718   int             bs=a->bs,j,n,bs2=bs*bs,*ib,ierr;
719 
720 
721   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
722   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
723 
724   if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */
725   else if (zz!=yy){
726     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
727     PetscMemcpy(z,y,N*sizeof(Scalar));
728     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
729   }
730 
731   idx   = a->j;
732   v     = a->a;
733   ii    = a->i;
734 
735   switch (bs) {
736   case 1:
737     for ( i=0; i<mbs; i++ ) {
738       n  = ii[1] - ii[0]; ii++;
739       xb = x + i; x1 = xb[0];
740       ib = idx + ai[i];
741       for ( j=0; j<n; j++ ) {
742         z[ib[j]] += *v++ * x1;
743       }
744     }
745     break;
746   case 2:
747     for ( i=0; i<mbs; i++ ) {
748       n  = ii[1] - ii[0]; ii++;
749       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
750       ib = idx + ai[i];
751       for ( j=0; j<n; j++ ) {
752         rval = ib[j]*2;
753         z[rval++] += v[0]*x1 + v[1]*x2;
754         z[rval++] += v[2]*x1 + v[3]*x2;
755         v += 4;
756       }
757     }
758     break;
759   case 3:
760     for ( i=0; i<mbs; i++ ) {
761       n  = ii[1] - ii[0]; ii++;
762       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
763       ib = idx + ai[i];
764       for ( j=0; j<n; j++ ) {
765         rval = ib[j]*3;
766         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
767         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
768         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
769         v += 9;
770       }
771     }
772     break;
773   case 4:
774     for ( i=0; i<mbs; i++ ) {
775       n  = ii[1] - ii[0]; ii++;
776       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
777       ib = idx + ai[i];
778       for ( j=0; j<n; j++ ) {
779         rval = ib[j]*4;
780         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
781         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
782         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
783         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
784         v += 16;
785       }
786     }
787     break;
788   case 5:
789     for ( i=0; i<mbs; i++ ) {
790       n  = ii[1] - ii[0]; ii++;
791       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
792       x4 = xb[3];   x5 = xb[4];
793       ib = idx + ai[i];
794       for ( j=0; j<n; j++ ) {
795         rval = ib[j]*5;
796         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
797         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
798         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
799         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
800         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
801         v += 25;
802       }
803     }
804     break;
805       /* block sizes larger then 5 by 5 are handled by BLAS */
806     default: {
807       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
808       if (!a->mult_work) {
809         k = max(a->m,a->n);
810         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
811         CHKPTRQ(a->mult_work);
812       }
813       work = a->mult_work;
814       for ( i=0; i<mbs; i++ ) {
815         n     = ii[1] - ii[0]; ii++;
816         ncols = n*bs;
817         PetscMemzero(work,ncols*sizeof(Scalar));
818         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
819         v += n*bs2;
820         x += bs;
821         workt = work;
822         for ( j=0; j<n; j++ ) {
823           zb = z + bs*(*idx++);
824           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
825           workt += bs;
826         }
827       }
828     }
829   }
830   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
831   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
832   return 0;
833 }
834 
835 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy)
836 {
837   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
838   int         ierr,bs=a->bs,bs2=bs*bs;
839 
840   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
841   PLogFlops(2*bs2*(a->nz)-a->n);
842   return 0;
843 }
844 
845 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
846 {
847   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
848   int         ierr,bs=a->bs,bs2=bs*bs;
849 
850   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
851   PLogFlops(2*bs2*(a->nz));
852   return 0;
853 }
854 
855 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
856 {
857   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
858   if (nz)  *nz  = a->bs*a->bs*a->nz;
859   if (nza) *nza = a->maxnz;
860   if (mem) *mem = (int)A->mem;
861   return 0;
862 }
863 
864 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
865 {
866   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
867 
868   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
869 
870   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
871   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
872       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
873     *flg = PETSC_FALSE; return 0;
874   }
875 
876   /* if the a->i are the same */
877   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
878     *flg = PETSC_FALSE; return 0;
879   }
880 
881   /* if a->j are the same */
882   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
883     *flg = PETSC_FALSE; return 0;
884   }
885 
886   /* if a->a are the same */
887   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
888     *flg = PETSC_FALSE; return 0;
889   }
890   *flg = PETSC_TRUE;
891   return 0;
892 
893 }
894 
895 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
896 {
897   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
898   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
899   Scalar      *x, zero = 0.0,*aa,*aa_j;
900 
901   bs   = a->bs;
902   aa   = a->a;
903   ai   = a->i;
904   aj   = a->j;
905   ambs = a->mbs;
906   bs2  = bs*bs;
907 
908   VecSet(&zero,v);
909   VecGetArray(v,&x); VecGetLocalSize(v,&n);
910   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
911   for ( i=0; i<ambs; i++ ) {
912     for ( j=ai[i]; j<ai[i+1]; j++ ) {
913       if (aj[j] == i) {
914         row  = i*bs;
915         aa_j = aa+j*bs2;
916         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
917         break;
918       }
919     }
920   }
921   return 0;
922 }
923 
924 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
925 {
926   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
927   Scalar      *l,*r,x,*v,*aa,*li,*ri;
928   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
929 
930   ai  = a->i;
931   aj  = a->j;
932   aa  = a->a;
933   m   = a->m;
934   n   = a->n;
935   bs  = a->bs;
936   mbs = a->mbs;
937   bs2 = bs*bs;
938   if (ll) {
939     VecGetArray(ll,&l); VecGetSize(ll,&lm);
940     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
941     for ( i=0; i<mbs; i++ ) { /* for each block row */
942       M  = ai[i+1] - ai[i];
943       li = l + i*bs;
944       v  = aa + bs2*ai[i];
945       for ( j=0; j<M; j++ ) { /* for each block */
946         for ( k=0; k<bs2; k++ ) {
947           (*v++) *= li[k%bs];
948         }
949       }
950     }
951   }
952 
953   if (rr) {
954     VecGetArray(rr,&r); VecGetSize(rr,&rn);
955     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
956     for ( i=0; i<mbs; i++ ) { /* for each block row */
957       M  = ai[i+1] - ai[i];
958       v  = aa + bs2*ai[i];
959       for ( j=0; j<M; j++ ) { /* for each block */
960         ri = r + bs*aj[ai[i]+j];
961         for ( k=0; k<bs; k++ ) {
962           x = ri[k];
963           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
964         }
965       }
966     }
967   }
968   return 0;
969 }
970 
971 
972 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
973 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
974 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
975 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
976 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
977 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
978 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
979 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
980 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
981 
982 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
983 {
984   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
985   Scalar      *v = a->a;
986   double      sum = 0.0;
987   int         i,bs=a->bs,nz=a->nz,bs2=bs*bs;
988 
989   if (type == NORM_FROBENIUS) {
990     for (i=0; i< bs2*nz; i++ ) {
991 #if defined(PETSC_COMPLEX)
992       sum += real(conj(*v)*(*v)); v++;
993 #else
994       sum += (*v)*(*v); v++;
995 #endif
996     }
997     *norm = sqrt(sum);
998   }
999   else {
1000     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1001   }
1002   return 0;
1003 }
1004 
1005 /*
1006      note: This can only work for identity for row and col. It would
1007    be good to check this and otherwise generate an error.
1008 */
1009 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1010 {
1011   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1012   Mat         outA;
1013   int         ierr;
1014 
1015   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1016 
1017   outA          = inA;
1018   inA->factor   = FACTOR_LU;
1019   a->row        = row;
1020   a->col        = col;
1021 
1022   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1023 
1024   if (!a->diag) {
1025     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1026   }
1027   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1028   return 0;
1029 }
1030 
1031 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1032 {
1033   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1034   int         one = 1, totalnz = a->bs*a->bs*a->nz;
1035   BLscal_( &totalnz, alpha, a->a, &one );
1036   PLogFlops(totalnz);
1037   return 0;
1038 }
1039 
1040 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1041 {
1042   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1043   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1044   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
1045   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
1046   Scalar     *ap, *aa = a->a, zero = 0.0;
1047 
1048   for ( k=0; k<m; k++ ) { /* loop over rows */
1049     row  = im[k]; brow = row/bs;
1050     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1051     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1052     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
1053     nrow = ailen[brow];
1054     for ( l=0; l<n; l++ ) { /* loop over columns */
1055       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1056       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1057       col  = in[l] - shift;
1058       bcol = col/bs;
1059       cidx = col%bs;
1060       ridx = row%bs;
1061       high = nrow;
1062       low  = 0; /* assume unsorted */
1063       while (high-low > 5) {
1064         t = (low+high)/2;
1065         if (rp[t] > bcol) high = t;
1066         else             low  = t;
1067       }
1068       for ( i=low; i<high; i++ ) {
1069         if (rp[i] > bcol) break;
1070         if (rp[i] == bcol) {
1071           *v++ = ap[bs2*i+bs*cidx+ridx];
1072           goto finished;
1073         }
1074       }
1075       *v++ = zero;
1076       finished:;
1077     }
1078   }
1079   return 0;
1080 }
1081 
1082 int MatPrintHelp_SeqBAIJ(Mat A)
1083 {
1084   static int called = 0;
1085 
1086   if (called) return 0; else called = 1;
1087   return 0;
1088 }
1089 /* -------------------------------------------------------------------*/
1090 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1091        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1092        MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ,
1093        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1094        MatSolve_SeqBAIJ,0,
1095        0,0,
1096        MatLUFactor_SeqBAIJ,0,
1097        0,
1098        MatTranspose_SeqBAIJ,
1099        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1100        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1101        0,MatAssemblyEnd_SeqBAIJ,
1102        0,
1103        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1104        MatGetReordering_SeqBAIJ,
1105        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1106        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1107        MatILUFactorSymbolic_SeqBAIJ,0,
1108        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1109        0,0,
1110        MatConvertSameType_SeqBAIJ,0,0,
1111        MatILUFactor_SeqBAIJ,0,0,
1112        0,0,
1113        MatGetValues_SeqBAIJ,0,
1114        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1115        0};
1116 
1117 /*@C
1118    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1119    compressed row) format.  For good matrix assembly performance the
1120    user should preallocate the matrix storage by setting the parameter nz
1121    (or nzz).  By setting these parameters accurately, performance can be
1122    increased by more than a factor of 50.
1123 
1124    Input Parameters:
1125 .  comm - MPI communicator, set to MPI_COMM_SELF
1126 .  bs - size of block
1127 .  m - number of rows
1128 .  n - number of columns
1129 .  nz - number of block nonzeros per block row (same for all rows)
1130 .  nzz - number of block nonzeros per block row or PETSC_NULL
1131          (possibly different for each row)
1132 
1133    Output Parameter:
1134 .  A - the matrix
1135 
1136    Notes:
1137    The block AIJ format is fully compatible with standard Fortran 77
1138    storage.  That is, the stored row and column indices can begin at
1139    either one (as in Fortran) or zero.  See the users' manual for details.
1140 
1141    Specify the preallocated storage with either nz or nnz (not both).
1142    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1143    allocation.  For additional details, see the users manual chapter on
1144    matrices and the file $(PETSC_DIR)/Performance.
1145 
1146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1147 @*/
1148 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1149 {
1150   Mat         B;
1151   Mat_SeqBAIJ *b;
1152   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1153 
1154   if (mbs*bs!=m || nbs*bs!=n)
1155     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1156 
1157   *A = 0;
1158   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1159   PLogObjectCreate(B);
1160   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1161   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1162   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1163   switch (bs) {
1164     case 1:
1165       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1166       break;
1167     case 2:
1168       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1169       break;
1170     case 3:
1171       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1172       break;
1173     case 4:
1174       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1175       break;
1176     case 5:
1177       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1178       break;
1179   }
1180   B->destroy          = MatDestroy_SeqBAIJ;
1181   B->view             = MatView_SeqBAIJ;
1182   B->factor           = 0;
1183   B->lupivotthreshold = 1.0;
1184   b->row              = 0;
1185   b->col              = 0;
1186   b->reallocs         = 0;
1187 
1188   b->m       = m; B->m = m; B->M = m;
1189   b->n       = n; B->n = n; B->N = n;
1190   b->mbs     = mbs;
1191   b->nbs     = nbs;
1192   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1193   if (nnz == PETSC_NULL) {
1194     if (nz == PETSC_DEFAULT) nz = 5;
1195     else if (nz <= 0)        nz = 1;
1196     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1197     nz = nz*mbs;
1198   }
1199   else {
1200     nz = 0;
1201     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1202   }
1203 
1204   /* allocate the matrix space */
1205   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1206   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1207   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1208   b->j  = (int *) (b->a + nz*bs2);
1209   PetscMemzero(b->j,nz*sizeof(int));
1210   b->i  = b->j + nz;
1211   b->singlemalloc = 1;
1212 
1213   b->i[0] = 0;
1214   for (i=1; i<mbs+1; i++) {
1215     b->i[i] = b->i[i-1] + b->imax[i-1];
1216   }
1217 
1218   /* b->ilen will count nonzeros in each block row so far. */
1219   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1220   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1221   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1222 
1223   b->bs               = bs;
1224   b->mbs              = mbs;
1225   b->nz               = 0;
1226   b->maxnz            = nz;
1227   b->sorted           = 0;
1228   b->roworiented      = 1;
1229   b->nonew            = 0;
1230   b->diag             = 0;
1231   b->solve_work       = 0;
1232   b->mult_work        = 0;
1233   b->spptr            = 0;
1234   b->indexshift       = 0;
1235   *A = B;
1236   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1237   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1238   return 0;
1239 }
1240 
1241 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1242 {
1243   Mat         C;
1244   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1245   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1246 
1247   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1248 
1249   *B = 0;
1250   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1251   PLogObjectCreate(C);
1252   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1253   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1254   C->destroy    = MatDestroy_SeqBAIJ;
1255   C->view       = MatView_SeqBAIJ;
1256   C->factor     = A->factor;
1257   c->row        = 0;
1258   c->col        = 0;
1259   C->assembled  = PETSC_TRUE;
1260 
1261   c->m = C->m   = a->m;
1262   c->n = C->n   = a->n;
1263   C->M          = a->m;
1264   C->N          = a->n;
1265 
1266   c->bs         = a->bs;
1267   c->mbs        = a->mbs;
1268   c->nbs        = a->nbs;
1269   c->indexshift = 0;
1270 
1271   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1272   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1273   for ( i=0; i<mbs; i++ ) {
1274     c->imax[i] = a->imax[i];
1275     c->ilen[i] = a->ilen[i];
1276   }
1277 
1278   /* allocate the matrix space */
1279   c->singlemalloc = 1;
1280   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1281   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1282   c->j  = (int *) (c->a + nz*bs2);
1283   c->i  = c->j + nz;
1284   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1285   if (mbs > 0) {
1286     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1287     if (cpvalues == COPY_VALUES) {
1288       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1289     }
1290   }
1291 
1292   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1293   c->sorted      = a->sorted;
1294   c->roworiented = a->roworiented;
1295   c->nonew       = a->nonew;
1296 
1297   if (a->diag) {
1298     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1299     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1300     for ( i=0; i<mbs; i++ ) {
1301       c->diag[i] = a->diag[i];
1302     }
1303   }
1304   else c->diag          = 0;
1305   c->nz                 = a->nz;
1306   c->maxnz              = a->maxnz;
1307   c->solve_work         = 0;
1308   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1309   c->mult_work          = 0;
1310   *B = C;
1311   return 0;
1312 }
1313 
1314 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1315 {
1316   Mat_SeqBAIJ  *a;
1317   Mat          B;
1318   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1319   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1320   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1321   int          *masked, nmask,tmp,ishift,bs2;
1322   Scalar       *aa;
1323   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1324 
1325   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1326   bs2  = bs*bs;
1327 
1328   MPI_Comm_size(comm,&size);
1329   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1330   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1331   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1332   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1333   M = header[1]; N = header[2]; nz = header[3];
1334 
1335   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1336 
1337   /*
1338      This code adds extra rows to make sure the number of rows is
1339     divisible by the blocksize
1340   */
1341   mbs        = M/bs;
1342   extra_rows = bs - M + bs*(mbs);
1343   if (extra_rows == bs) extra_rows = 0;
1344   else                  mbs++;
1345   if (extra_rows) {
1346     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1347   }
1348 
1349   /* read in row lengths */
1350   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1351   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1352   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1353 
1354   /* read in column indices */
1355   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1356   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1357   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1358 
1359   /* loop over row lengths determining block row lengths */
1360   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1361   PetscMemzero(browlengths,mbs*sizeof(int));
1362   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1363   PetscMemzero(mask,mbs*sizeof(int));
1364   masked = mask + mbs;
1365   rowcount = 0; nzcount = 0;
1366   for ( i=0; i<mbs; i++ ) {
1367     nmask = 0;
1368     for ( j=0; j<bs; j++ ) {
1369       kmax = rowlengths[rowcount];
1370       for ( k=0; k<kmax; k++ ) {
1371         tmp = jj[nzcount++]/bs;
1372         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1373       }
1374       rowcount++;
1375     }
1376     browlengths[i] += nmask;
1377     /* zero out the mask elements we set */
1378     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1379   }
1380 
1381   /* create our matrix */
1382   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1383          CHKERRQ(ierr);
1384   B = *A;
1385   a = (Mat_SeqBAIJ *) B->data;
1386 
1387   /* set matrix "i" values */
1388   a->i[0] = 0;
1389   for ( i=1; i<= mbs; i++ ) {
1390     a->i[i]      = a->i[i-1] + browlengths[i-1];
1391     a->ilen[i-1] = browlengths[i-1];
1392   }
1393   a->indexshift = 0;
1394   a->nz         = 0;
1395   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1396 
1397   /* read in nonzero values */
1398   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1399   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1400   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1401 
1402   /* set "a" and "j" values into matrix */
1403   nzcount = 0; jcount = 0;
1404   for ( i=0; i<mbs; i++ ) {
1405     nzcountb = nzcount;
1406     nmask    = 0;
1407     for ( j=0; j<bs; j++ ) {
1408       kmax = rowlengths[i*bs+j];
1409       for ( k=0; k<kmax; k++ ) {
1410         tmp = jj[nzcount++]/bs;
1411 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1412       }
1413       rowcount++;
1414     }
1415     /* sort the masked values */
1416     PetscSortInt(nmask,masked);
1417 
1418     /* set "j" values into matrix */
1419     maskcount = 1;
1420     for ( j=0; j<nmask; j++ ) {
1421       a->j[jcount++]  = masked[j];
1422       mask[masked[j]] = maskcount++;
1423     }
1424     /* set "a" values into matrix */
1425     ishift = bs2*a->i[i];
1426     for ( j=0; j<bs; j++ ) {
1427       kmax = rowlengths[i*bs+j];
1428       for ( k=0; k<kmax; k++ ) {
1429         tmp    = jj[nzcountb]/bs ;
1430         block  = mask[tmp] - 1;
1431         point  = jj[nzcountb] - bs*tmp;
1432         idx    = ishift + bs2*block + j + bs*point;
1433         a->a[idx] = aa[nzcountb++];
1434       }
1435     }
1436     /* zero out the mask elements we set */
1437     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1438   }
1439   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1440 
1441   PetscFree(rowlengths);
1442   PetscFree(browlengths);
1443   PetscFree(aa);
1444   PetscFree(jj);
1445   PetscFree(mask);
1446 
1447   B->assembled = PETSC_TRUE;
1448 
1449   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1450   if (flg) {
1451     Viewer tviewer;
1452     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1453     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1454     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1455     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1456   }
1457   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1458   if (flg) {
1459     Viewer tviewer;
1460     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1461     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1462     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1463     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1464   }
1465   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1466   if (flg) {
1467     Viewer tviewer;
1468     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1469     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1470     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1471   }
1472   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1473   if (flg) {
1474     Viewer tviewer;
1475     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1476     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1477     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1478     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1479   }
1480   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1481   if (flg) {
1482     Viewer tviewer;
1483     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1484     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1485     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1486     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1487   }
1488   return 0;
1489 }
1490 
1491 
1492 
1493