xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bb42667fc153f7d704b906411b30c2fa71bf0e13)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.32 1996/04/09 02:23:33 curfman Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 #define  max(a,b) (a>b ? a:b)
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
166     fprintf(fd,"  block size is %d\n",bs);
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else if (format == ASCII_FORMAT_COMMON) {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
183 #else
184           if (a->a[bs2*k + l*bs + j] != 0.0)
185             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
186 #endif
187           }
188         }
189         fprintf(fd,"\n");
190       }
191     }
192   }
193   else {
194     for ( i=0; i<a->mbs; i++ ) {
195       for ( j=0; j<bs; j++ ) {
196         fprintf(fd,"row %d:",i*bs+j);
197         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198           for ( l=0; l<bs; l++ ) {
199 #if defined(PETSC_COMPLEX)
200           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
201             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
202               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
203           }
204           else {
205             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
206           }
207 #else
208             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
209 #endif
210           }
211         }
212         fprintf(fd,"\n");
213       }
214     }
215   }
216   fflush(fd);
217   return 0;
218 }
219 
220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
221 {
222   Mat         A = (Mat) obj;
223   ViewerType  vtype;
224   int         ierr;
225 
226   if (!viewer) {
227     viewer = STDOUT_VIEWER_SELF;
228   }
229 
230   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
231   if (vtype == MATLAB_VIEWER) {
232     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
233   }
234   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235     return MatView_SeqBAIJ_ASCII(A,viewer);
236   }
237   else if (vtype == BINARY_FILE_VIEWER) {
238     return MatView_SeqBAIJ_Binary(A,viewer);
239   }
240   else if (vtype == DRAW_VIEWER) {
241     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
242   }
243   return 0;
244 }
245 
246 #define CHUNKSIZE  10
247 
248 /* This version has row oriented v  */
249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250 {
251   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
255   int          ridx,cidx,bs2=bs*bs;
256   Scalar      *ap,value,*aa=a->a,*bap;
257 
258   for ( k=0; k<m; k++ ) { /* loop over added rows */
259     row  = im[k]; brow = row/bs;
260     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
262     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
263     rmax = imax[brow]; nrow = ailen[brow];
264     low = 0;
265     for ( l=0; l<n; l++ ) { /* loop over added columns */
266       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268       col = in[l] - shift; bcol = col/bs;
269       ridx = row % bs; cidx = col % bs;
270       if (roworiented) {
271         value = *v++;
272       }
273       else {
274         value = v[k + l*m];
275       }
276       if (!sorted) low = 0; high = nrow;
277       while (high-low > 5) {
278         t = (low+high)/2;
279         if (rp[t] > bcol) high = t;
280         else             low  = t;
281       }
282       for ( i=low; i<high; i++ ) {
283         if (rp[i] > bcol) break;
284         if (rp[i] == bcol) {
285           bap  = ap +  bs2*i + bs*cidx + ridx;
286           if (is == ADD_VALUES) *bap += value;
287           else                  *bap  = value;
288           goto noinsert;
289         }
290       }
291       if (nonew) goto noinsert;
292       if (nrow >= rmax) {
293         /* there is no extra room in row, therefore enlarge */
294         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295         Scalar *new_a;
296 
297         /* malloc new storage space */
298         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
300         new_j   = (int *) (new_a + bs2*new_nz);
301         new_i   = new_j + new_nz;
302 
303         /* copy over old data into new slots */
304         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
305         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
306         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
307         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
308         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
309                                                            len*sizeof(int));
310         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
311         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
312         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
313                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
314         /* free up old matrix storage */
315         PetscFree(a->a);
316         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318         a->singlemalloc = 1;
319 
320         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
321         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
322         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323         a->maxnz += CHUNKSIZE;
324         a->reallocs++;
325         a->nz++;
326       }
327       N = nrow++ - 1;
328       /* shift up all the later entries in this row */
329       for ( ii=N; ii>=i; ii-- ) {
330         rp[ii+1] = rp[ii];
331          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332       }
333       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334       rp[i] = bcol;
335       ap[bs2*i + bs*cidx + ridx] = value;
336       noinsert:;
337       low = i;
338     }
339     ailen[brow] = nrow;
340   }
341   return 0;
342 }
343 
344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
345 {
346   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
347   *m = a->m; *n = a->n;
348   return 0;
349 }
350 
351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
352 {
353   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
354   *m = 0; *n = a->m;
355   return 0;
356 }
357 
358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
362   Scalar      *aa,*v_i,*aa_i;
363 
364   bs  = a->bs;
365   ai  = a->i;
366   aj  = a->j;
367   aa  = a->a;
368   bs2 = bs*bs;
369 
370   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
371 
372   bn  = row/bs;   /* Block number */
373   bp  = row % bs; /* Block Position */
374   M   = ai[bn+1] - ai[bn];
375   *nz = bs*M;
376 
377   if (v) {
378     *v = 0;
379     if (*nz) {
380       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
381       for ( i=0; i<M; i++ ) { /* for each block in the block row */
382         v_i  = *v + i*bs;
383         aa_i = aa + bs2*(ai[bn] + i);
384         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
385       }
386     }
387   }
388 
389   if (idx) {
390     *idx = 0;
391     if (*nz) {
392       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
393       for ( i=0; i<M; i++ ) { /* for each block in the block row */
394         idx_i = *idx + i*bs;
395         itmp  = bs*aj[ai[bn] + i];
396         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
397       }
398     }
399   }
400   return 0;
401 }
402 
403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
404 {
405   if (idx) {if (*idx) PetscFree(*idx);}
406   if (v)   {if (*v)   PetscFree(*v);}
407   return 0;
408 }
409 
410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411 {
412   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413   Mat         C;
414   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
416   Scalar      *array=a->a;
417 
418   if (B==PETSC_NULL && mbs!=nbs)
419     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421   PetscMemzero(col,(1+nbs)*sizeof(int));
422   if (shift) {
423     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
424   }
425   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
426   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
427   PetscFree(col);
428   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
429   cols = rows + bs;
430   for ( i=0; i<mbs; i++ ) {
431     cols[0] = i*bs;
432     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
433     len = ai[i+1] - ai[i];
434     for ( j=0; j<len; j++ ) {
435       rows[0] = (*aj++)*bs;
436       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
437       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
438       array += bs2;
439     }
440   }
441   PetscFree(rows);
442   if (shift) {
443     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
444   }
445 
446   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
447   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
448 
449   if (B != PETSC_NULL) {
450     *B = C;
451   } else {
452     /* This isn't really an in-place transpose */
453     PetscFree(a->a);
454     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
455     if (a->diag) PetscFree(a->diag);
456     if (a->ilen) PetscFree(a->ilen);
457     if (a->imax) PetscFree(a->imax);
458     if (a->solve_work) PetscFree(a->solve_work);
459     PetscFree(a);
460     PetscMemcpy(A,C,sizeof(struct _Mat));
461     PetscHeaderDestroy(C);
462   }
463   return 0;
464 }
465 
466 
467 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
468 {
469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
470   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
471   int        m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift;
472   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
473   Scalar     *aa = a->a, *ap;
474 
475   if (mode == FLUSH_ASSEMBLY) return 0;
476 
477   for ( i=1; i<mbs; i++ ) {
478     /* move each row back by the amount of empty slots (fshift) before it*/
479     fshift += imax[i-1] - ailen[i-1];
480     if (fshift) {
481       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
482       N = ailen[i];
483       for ( j=0; j<N; j++ ) {
484         ip[j-fshift] = ip[j];
485         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
486       }
487     }
488     ai[i] = ai[i-1] + ailen[i-1];
489   }
490   if (mbs) {
491     fshift += imax[mbs-1] - ailen[mbs-1];
492     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
493   }
494   /* reset ilen and imax for each row */
495   for ( i=0; i<mbs; i++ ) {
496     ailen[i] = imax[i] = ai[i+1] - ai[i];
497   }
498   a->nz = ai[mbs] + shift;
499 
500   /* diagonals may have moved, so kill the diagonal pointers */
501   if (fshift && a->diag) {
502     PetscFree(a->diag);
503     PLogObjectMemory(A,-(m+1)*sizeof(int));
504     a->diag = 0;
505   }
506   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
507            fshift,a->nz,m);
508   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
509            a->reallocs);
510   return 0;
511 }
512 
513 static int MatZeroEntries_SeqBAIJ(Mat A)
514 {
515   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
516   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
517   return 0;
518 }
519 
520 int MatDestroy_SeqBAIJ(PetscObject obj)
521 {
522   Mat         A  = (Mat) obj;
523   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
524 
525 #if defined(PETSC_LOG)
526   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
527 #endif
528   PetscFree(a->a);
529   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
530   if (a->diag) PetscFree(a->diag);
531   if (a->ilen) PetscFree(a->ilen);
532   if (a->imax) PetscFree(a->imax);
533   if (a->solve_work) PetscFree(a->solve_work);
534   if (a->mult_work) PetscFree(a->mult_work);
535   PetscFree(a);
536   PLogObjectDestroy(A);
537   PetscHeaderDestroy(A);
538   return 0;
539 }
540 
541 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
542 {
543   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
544   if      (op == ROW_ORIENTED)              a->roworiented = 1;
545   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
546   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
547   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
548   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
549   else if (op == ROWS_SORTED ||
550            op == SYMMETRIC_MATRIX ||
551            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
552            op == YES_NEW_DIAGONALS)
553     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
554   else if (op == NO_NEW_DIAGONALS)
555     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
556   else
557     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
558   return 0;
559 }
560 
561 
562 /* -------------------------------------------------------*/
563 /* Should check that shapes of vectors and matrices match */
564 /* -------------------------------------------------------*/
565 #include "pinclude/plapack.h"
566 
567 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
568 {
569   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
570   Scalar          *xg,*y,*zg;
571   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
572   register Scalar x1,x2,x3,x4,x5;
573   int             mbs=a->mbs,m=a->m,i,*idx,*ii;
574   int             bs=a->bs,j,n,bs2=bs*bs,ierr;
575 
576   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
577   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
578 
579   if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */
580   else if (zz!=yy){
581     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
582     PetscMemcpy(z,y,m*sizeof(Scalar));
583   }
584 
585   idx   = a->j;
586   v     = a->a;
587   ii    = a->i;
588 
589   switch (bs) {
590   case 1:
591     for ( i=0; i<mbs; i++ ) {
592       n    = ii[1] - ii[0]; ii++;
593       sum  = 0.0;
594       while (n--) sum += *v++ * x[*idx++];
595       z[i] += sum;
596     }
597     break;
598   case 2:
599     for ( i=0; i<mbs; i++ ) {
600       n  = ii[1] - ii[0]; ii++;
601       sum1 = 0.0; sum2 = 0.0;
602       for ( j=0; j<n; j++ ) {
603         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
604         sum1 += v[0]*x1 + v[2]*x2;
605         sum2 += v[1]*x1 + v[3]*x2;
606         v += 4;
607       }
608       z[0] += sum1; z[1] += sum2;
609       z += 2;
610     }
611     break;
612   case 3:
613     for ( i=0; i<mbs; i++ ) {
614       n  = ii[1] - ii[0]; ii++;
615       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
616       for ( j=0; j<n; j++ ) {
617         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
618         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
619         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
620         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
621         v += 9;
622       }
623       z[0] += sum1; z[1] += sum2; z[2] += sum3;
624       z += 3;
625     }
626     break;
627   case 4:
628     for ( i=0; i<mbs; i++ ) {
629       n  = ii[1] - ii[0]; ii++;
630       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
631       for ( j=0; j<n; j++ ) {
632         xb = x + 4*(*idx++);
633         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
634         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
635         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
636         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
637         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
638         v += 16;
639       }
640       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4;
641       z += 4;
642     }
643     break;
644   case 5:
645     for ( i=0; i<mbs; i++ ) {
646       n  = ii[1] - ii[0]; ii++;
647       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
648       for ( j=0; j<n; j++ ) {
649         xb = x + 5*(*idx++);
650         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
651         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
652         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
653         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
654         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
655         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
656         v += 25;
657       }
658       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5;
659       z += 5;
660     }
661     break;
662     /* block sizes larger then 5 by 5 are handled by BLAS */
663   default: {
664       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
665       if (!a->mult_work) {
666         k = max(a->m,a->n);
667         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
668         CHKPTRQ(a->mult_work);
669       }
670       work = a->mult_work;
671       for ( i=0; i<mbs; i++ ) {
672         n     = ii[1] - ii[0]; ii++;
673         ncols = n*bs;
674         workt = work;
675         for ( j=0; j<n; j++ ) {
676           xb = x + bs*(*idx++);
677           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
678           workt += bs;
679         }
680         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
681         v += n*bs2;
682         z += bs;
683       }
684     }
685   }
686   return 0;
687 }
688 
689 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy)
690 {
691   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
692   int         ierr,bs=a->bs,bs2=bs*bs;
693 
694   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
695   PLogFlops(2*bs2*(a->nz)-a->m);
696   return 0;
697 }
698 
699 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
700 {
701   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
702   int         ierr,bs=a->bs,bs2=bs*bs;
703 
704   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
705   PLogFlops(2*bs2*(a->nz));
706   return 0;
707 }
708 
709 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
710 {
711   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
712   Scalar          *xg,*y,*zg,*zb;
713   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
714   int             mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval;
715   int             bs=a->bs,j,n,bs2=bs*bs,*ib,ierr;
716 
717 
718   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
719   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
720 
721   if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */
722   else if (zz!=yy){
723     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
724     PetscMemcpy(z,y,N*sizeof(Scalar));
725   }
726 
727   idx   = a->j;
728   v     = a->a;
729   ii    = a->i;
730 
731   switch (bs) {
732   case 1:
733     for ( i=0; i<mbs; i++ ) {
734       n  = ii[1] - ii[0]; ii++;
735       xb = x + i; x1 = xb[0];
736       ib = idx + ai[i];
737       for ( j=0; j<n; j++ ) {
738         z[ib[j]] += *v++ * x1;
739       }
740     }
741     break;
742   case 2:
743     for ( i=0; i<mbs; i++ ) {
744       n  = ii[1] - ii[0]; ii++;
745       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
746       ib = idx + ai[i];
747       for ( j=0; j<n; j++ ) {
748         rval = ib[j]*2;
749         z[rval++] += v[0]*x1 + v[1]*x2;
750         z[rval++] += v[2]*x1 + v[3]*x2;
751         v += 4;
752       }
753     }
754     break;
755   case 3:
756     for ( i=0; i<mbs; i++ ) {
757       n  = ii[1] - ii[0]; ii++;
758       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
759       ib = idx + ai[i];
760       for ( j=0; j<n; j++ ) {
761         rval = ib[j]*3;
762         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
763         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
764         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
765         v += 9;
766       }
767     }
768     break;
769   case 4:
770     for ( i=0; i<mbs; i++ ) {
771       n  = ii[1] - ii[0]; ii++;
772       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
773       ib = idx + ai[i];
774       for ( j=0; j<n; j++ ) {
775         rval = ib[j]*4;
776         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
777         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
778         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
779         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
780         v += 16;
781       }
782     }
783     break;
784   case 5:
785     for ( i=0; i<mbs; i++ ) {
786       n  = ii[1] - ii[0]; ii++;
787       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
788       x4 = xb[3];   x5 = xb[4];
789       ib = idx + ai[i];
790       for ( j=0; j<n; j++ ) {
791         rval = ib[j]*5;
792         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
793         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
794         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
795         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
796         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
797         v += 25;
798       }
799     }
800     break;
801       /* block sizes larger then 5 by 5 are handled by BLAS */
802     default: {
803       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
804       if (!a->mult_work) {
805         k = max(a->m,a->n);
806         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
807         CHKPTRQ(a->mult_work);
808       }
809       work = a->mult_work;
810       for ( i=0; i<mbs; i++ ) {
811         n     = ii[1] - ii[0]; ii++;
812         ncols = n*bs;
813         PetscMemzero(work,ncols*sizeof(Scalar));
814         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
815         v += n*bs2;
816         x += bs;
817         workt = work;
818         for ( j=0; j<n; j++ ) {
819           zb = z + bs*(*idx++);
820           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
821           workt += bs;
822         }
823       }
824     }
825   }
826   PLogFlops(2*bs2*(a->nz));
827   return 0;
828 }
829 
830 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy)
831 {
832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
833   int         ierr,bs=a->bs,bs2=bs*bs;
834 
835   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
836   PLogFlops(2*bs2*(a->nz)-a->n);
837   return 0;
838 }
839 
840 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
841 {
842   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
843   int         ierr,bs=a->bs,bs2=bs*bs;
844 
845   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
846   PLogFlops(2*bs2*(a->nz));
847   return 0;
848 }
849 
850 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
851 {
852   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
853   if (nz)  *nz  = a->bs*a->bs*a->nz;
854   if (nza) *nza = a->maxnz;
855   if (mem) *mem = (int)A->mem;
856   return 0;
857 }
858 
859 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
860 {
861   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
862 
863   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
864 
865   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
866   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
867       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
868     *flg = PETSC_FALSE; return 0;
869   }
870 
871   /* if the a->i are the same */
872   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
873     *flg = PETSC_FALSE; return 0;
874   }
875 
876   /* if a->j are the same */
877   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
878     *flg = PETSC_FALSE; return 0;
879   }
880 
881   /* if a->a are the same */
882   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
883     *flg = PETSC_FALSE; return 0;
884   }
885   *flg = PETSC_TRUE;
886   return 0;
887 
888 }
889 
890 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
891 {
892   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
893   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
894   Scalar      *x, zero = 0.0,*aa,*aa_j;
895 
896   bs   = a->bs;
897   aa   = a->a;
898   ai   = a->i;
899   aj   = a->j;
900   ambs = a->mbs;
901   bs2  = bs*bs;
902 
903   VecSet(&zero,v);
904   VecGetArray(v,&x); VecGetLocalSize(v,&n);
905   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
906   for ( i=0; i<ambs; i++ ) {
907     for ( j=ai[i]; j<ai[i+1]; j++ ) {
908       if (aj[j] == i) {
909         row  = i*bs;
910         aa_j = aa+j*bs2;
911         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
912         break;
913       }
914     }
915   }
916   return 0;
917 }
918 
919 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
920 {
921   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
922   Scalar      *l,*r,x,*v,*aa,*li,*ri;
923   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
924 
925   ai  = a->i;
926   aj  = a->j;
927   aa  = a->a;
928   m   = a->m;
929   n   = a->n;
930   bs  = a->bs;
931   mbs = a->mbs;
932   bs2 = bs*bs;
933   if (ll) {
934     VecGetArray(ll,&l); VecGetSize(ll,&lm);
935     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
936     for ( i=0; i<mbs; i++ ) { /* for each block row */
937       M  = ai[i+1] - ai[i];
938       li = l + i*bs;
939       v  = aa + bs2*ai[i];
940       for ( j=0; j<M; j++ ) { /* for each block */
941         for ( k=0; k<bs2; k++ ) {
942           (*v++) *= li[k%bs];
943         }
944       }
945     }
946   }
947 
948   if (rr) {
949     VecGetArray(rr,&r); VecGetSize(rr,&rn);
950     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
951     for ( i=0; i<mbs; i++ ) { /* for each block row */
952       M  = ai[i+1] - ai[i];
953       v  = aa + bs2*ai[i];
954       for ( j=0; j<M; j++ ) { /* for each block */
955         ri = r + bs*aj[ai[i]+j];
956         for ( k=0; k<bs; k++ ) {
957           x = ri[k];
958           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
959         }
960       }
961     }
962   }
963   return 0;
964 }
965 
966 
967 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
968 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
969 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
970 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
971 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
972 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
973 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
974 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
975 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
976 
977 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
978 {
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
980   Scalar      *v = a->a;
981   double      sum = 0.0;
982   int         i;
983 
984   if (type == NORM_FROBENIUS) {
985     for (i=0; i<a->nz; i++ ) {
986 #if defined(PETSC_COMPLEX)
987       sum += real(conj(*v)*(*v)); v++;
988 #else
989       sum += (*v)*(*v); v++;
990 #endif
991     }
992     *norm = sqrt(sum);
993   }
994   else {
995     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
996   }
997   return 0;
998 }
999 
1000 /*
1001      note: This can only work for identity for row and col. It would
1002    be good to check this and otherwise generate an error.
1003 */
1004 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1005 {
1006   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1007   Mat         outA;
1008   int         ierr;
1009 
1010   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1011 
1012   outA          = inA;
1013   inA->factor   = FACTOR_LU;
1014   a->row        = row;
1015   a->col        = col;
1016 
1017   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1018 
1019   if (!a->diag) {
1020     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1021   }
1022   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1023   return 0;
1024 }
1025 
1026 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1027 {
1028   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1029   int         one = 1, totalnz = a->bs*a->bs*a->nz;
1030   BLscal_( &totalnz, alpha, a->a, &one );
1031   PLogFlops(totalnz);
1032   return 0;
1033 }
1034 
1035 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1036 {
1037   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1038   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1039   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
1040   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
1041   Scalar     *ap, *aa = a->a, zero = 0.0;
1042 
1043   for ( k=0; k<m; k++ ) { /* loop over rows */
1044     row  = im[k]; brow = row/bs;
1045     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1046     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1047     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
1048     nrow = ailen[brow];
1049     for ( l=0; l<n; l++ ) { /* loop over columns */
1050       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1051       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1052       col  = in[l] - shift;
1053       bcol = col/bs;
1054       cidx = col%bs;
1055       ridx = row%bs;
1056       high = nrow;
1057       low  = 0; /* assume unsorted */
1058       while (high-low > 5) {
1059         t = (low+high)/2;
1060         if (rp[t] > bcol) high = t;
1061         else             low  = t;
1062       }
1063       for ( i=low; i<high; i++ ) {
1064         if (rp[i] > bcol) break;
1065         if (rp[i] == bcol) {
1066           *v++ = ap[bs2*i+bs*cidx+ridx];
1067           goto finished;
1068         }
1069       }
1070       *v++ = zero;
1071       finished:;
1072     }
1073   }
1074   return 0;
1075 }
1076 
1077 int MatPrintHelp_SeqBAIJ(Mat A)
1078 {
1079   static int called = 0;
1080 
1081   if (called) return 0; else called = 1;
1082   return 0;
1083 }
1084 /* -------------------------------------------------------------------*/
1085 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1086        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1087        MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ,
1088        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1089        MatSolve_SeqBAIJ,0,
1090        0,0,
1091        MatLUFactor_SeqBAIJ,0,
1092        0,
1093        MatTranspose_SeqBAIJ,
1094        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1095        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1096        0,MatAssemblyEnd_SeqBAIJ,
1097        0,
1098        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1099        MatGetReordering_SeqBAIJ,
1100        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1101        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1102        MatILUFactorSymbolic_SeqBAIJ,0,
1103        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1104        0,0,
1105        MatConvertSameType_SeqBAIJ,0,0,
1106        MatILUFactor_SeqBAIJ,0,0,
1107        0,0,
1108        MatGetValues_SeqBAIJ,0,
1109        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1110        0};
1111 
1112 /*@C
1113    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1114    compressed row) format.  For good matrix assembly performance the
1115    user should preallocate the matrix storage by setting the parameter nz
1116    (or nzz).  By setting these parameters accurately, performance can be
1117    increased by more than a factor of 50.
1118 
1119    Input Parameters:
1120 .  comm - MPI communicator, set to MPI_COMM_SELF
1121 .  bs - size of block
1122 .  m - number of rows
1123 .  n - number of columns
1124 .  nz - number of block nonzeros per block row (same for all rows)
1125 .  nzz - number of block nonzeros per block row or PETSC_NULL
1126          (possibly different for each row)
1127 
1128    Output Parameter:
1129 .  A - the matrix
1130 
1131    Notes:
1132    The block AIJ format is fully compatible with standard Fortran 77
1133    storage.  That is, the stored row and column indices can begin at
1134    either one (as in Fortran) or zero.  See the users' manual for details.
1135 
1136    Specify the preallocated storage with either nz or nnz (not both).
1137    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1138    allocation.  For additional details, see the users manual chapter on
1139    matrices and the file $(PETSC_DIR)/Performance.
1140 
1141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1142 @*/
1143 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1144 {
1145   Mat         B;
1146   Mat_SeqBAIJ *b;
1147   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1148 
1149   if (mbs*bs!=m || nbs*bs!=n)
1150     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1151 
1152   *A = 0;
1153   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1154   PLogObjectCreate(B);
1155   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1156   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1157   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1158   switch (bs) {
1159     case 1:
1160       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1161       break;
1162     case 2:
1163       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1164       break;
1165     case 3:
1166       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1167       break;
1168     case 4:
1169       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1170       break;
1171     case 5:
1172       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1173       break;
1174   }
1175   B->destroy          = MatDestroy_SeqBAIJ;
1176   B->view             = MatView_SeqBAIJ;
1177   B->factor           = 0;
1178   B->lupivotthreshold = 1.0;
1179   b->row              = 0;
1180   b->col              = 0;
1181   b->reallocs         = 0;
1182 
1183   b->m       = m; B->m = m; B->M = m;
1184   b->n       = n; B->n = n; B->N = n;
1185   b->mbs     = mbs;
1186   b->nbs     = nbs;
1187   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1188   if (nnz == PETSC_NULL) {
1189     if (nz == PETSC_DEFAULT) nz = 5;
1190     else if (nz <= 0)        nz = 1;
1191     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1192     nz = nz*mbs;
1193   }
1194   else {
1195     nz = 0;
1196     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1197   }
1198 
1199   /* allocate the matrix space */
1200   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1201   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1202   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1203   b->j  = (int *) (b->a + nz*bs2);
1204   PetscMemzero(b->j,nz*sizeof(int));
1205   b->i  = b->j + nz;
1206   b->singlemalloc = 1;
1207 
1208   b->i[0] = 0;
1209   for (i=1; i<mbs+1; i++) {
1210     b->i[i] = b->i[i-1] + b->imax[i-1];
1211   }
1212 
1213   /* b->ilen will count nonzeros in each block row so far. */
1214   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1215   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1216   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1217 
1218   b->bs               = bs;
1219   b->mbs              = mbs;
1220   b->nz               = 0;
1221   b->maxnz            = nz;
1222   b->sorted           = 0;
1223   b->roworiented      = 1;
1224   b->nonew            = 0;
1225   b->diag             = 0;
1226   b->solve_work       = 0;
1227   b->mult_work        = 0;
1228   b->spptr            = 0;
1229   b->indexshift       = 0;
1230   *A = B;
1231   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1232   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1233   return 0;
1234 }
1235 
1236 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1237 {
1238   Mat         C;
1239   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1240   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1241 
1242   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1243 
1244   *B = 0;
1245   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1246   PLogObjectCreate(C);
1247   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1248   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1249   C->destroy    = MatDestroy_SeqBAIJ;
1250   C->view       = MatView_SeqBAIJ;
1251   C->factor     = A->factor;
1252   c->row        = 0;
1253   c->col        = 0;
1254   C->assembled  = PETSC_TRUE;
1255 
1256   c->m = C->m   = a->m;
1257   c->n = C->n   = a->n;
1258   C->M          = a->m;
1259   C->N          = a->n;
1260 
1261   c->bs         = a->bs;
1262   c->mbs        = a->mbs;
1263   c->nbs        = a->nbs;
1264   c->indexshift = 0;
1265 
1266   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1267   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1268   for ( i=0; i<mbs; i++ ) {
1269     c->imax[i] = a->imax[i];
1270     c->ilen[i] = a->ilen[i];
1271   }
1272 
1273   /* allocate the matrix space */
1274   c->singlemalloc = 1;
1275   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1276   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1277   c->j  = (int *) (c->a + nz*bs2);
1278   c->i  = c->j + nz;
1279   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1280   if (mbs > 0) {
1281     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1282     if (cpvalues == COPY_VALUES) {
1283       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1284     }
1285   }
1286 
1287   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1288   c->sorted      = a->sorted;
1289   c->roworiented = a->roworiented;
1290   c->nonew       = a->nonew;
1291 
1292   if (a->diag) {
1293     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1294     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1295     for ( i=0; i<mbs; i++ ) {
1296       c->diag[i] = a->diag[i];
1297     }
1298   }
1299   else c->diag          = 0;
1300   c->nz                 = a->nz;
1301   c->maxnz              = a->maxnz;
1302   c->solve_work         = 0;
1303   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1304   c->mult_work          = 0;
1305   *B = C;
1306   return 0;
1307 }
1308 
1309 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1310 {
1311   Mat_SeqBAIJ  *a;
1312   Mat          B;
1313   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1314   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1315   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1316   int          *masked, nmask,tmp,ishift,bs2;
1317   Scalar       *aa;
1318   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1319 
1320   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1321   bs2  = bs*bs;
1322 
1323   MPI_Comm_size(comm,&size);
1324   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1325   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1326   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1327   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1328   M = header[1]; N = header[2]; nz = header[3];
1329 
1330   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1331 
1332   /*
1333      This code adds extra rows to make sure the number of rows is
1334     divisible by the blocksize
1335   */
1336   mbs        = M/bs;
1337   extra_rows = bs - M + bs*(mbs);
1338   if (extra_rows == bs) extra_rows = 0;
1339   else                  mbs++;
1340   if (extra_rows) {
1341     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1342   }
1343 
1344   /* read in row lengths */
1345   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1346   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1347   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1348 
1349   /* read in column indices */
1350   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1351   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1352   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1353 
1354   /* loop over row lengths determining block row lengths */
1355   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1356   PetscMemzero(browlengths,mbs*sizeof(int));
1357   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1358   PetscMemzero(mask,mbs*sizeof(int));
1359   masked = mask + mbs;
1360   rowcount = 0; nzcount = 0;
1361   for ( i=0; i<mbs; i++ ) {
1362     nmask = 0;
1363     for ( j=0; j<bs; j++ ) {
1364       kmax = rowlengths[rowcount];
1365       for ( k=0; k<kmax; k++ ) {
1366         tmp = jj[nzcount++]/bs;
1367         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1368       }
1369       rowcount++;
1370     }
1371     browlengths[i] += nmask;
1372     /* zero out the mask elements we set */
1373     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1374   }
1375 
1376   /* create our matrix */
1377   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1378          CHKERRQ(ierr);
1379   B = *A;
1380   a = (Mat_SeqBAIJ *) B->data;
1381 
1382   /* set matrix "i" values */
1383   a->i[0] = 0;
1384   for ( i=1; i<= mbs; i++ ) {
1385     a->i[i]      = a->i[i-1] + browlengths[i-1];
1386     a->ilen[i-1] = browlengths[i-1];
1387   }
1388   a->indexshift = 0;
1389   a->nz         = 0;
1390   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1391 
1392   /* read in nonzero values */
1393   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1394   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1395   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1396 
1397   /* set "a" and "j" values into matrix */
1398   nzcount = 0; jcount = 0;
1399   for ( i=0; i<mbs; i++ ) {
1400     nzcountb = nzcount;
1401     nmask    = 0;
1402     for ( j=0; j<bs; j++ ) {
1403       kmax = rowlengths[i*bs+j];
1404       for ( k=0; k<kmax; k++ ) {
1405         tmp = jj[nzcount++]/bs;
1406 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1407       }
1408       rowcount++;
1409     }
1410     /* sort the masked values */
1411     PetscSortInt(nmask,masked);
1412 
1413     /* set "j" values into matrix */
1414     maskcount = 1;
1415     for ( j=0; j<nmask; j++ ) {
1416       a->j[jcount++]  = masked[j];
1417       mask[masked[j]] = maskcount++;
1418     }
1419     /* set "a" values into matrix */
1420     ishift = bs2*a->i[i];
1421     for ( j=0; j<bs; j++ ) {
1422       kmax = rowlengths[i*bs+j];
1423       for ( k=0; k<kmax; k++ ) {
1424         tmp    = jj[nzcountb]/bs ;
1425         block  = mask[tmp] - 1;
1426         point  = jj[nzcountb] - bs*tmp;
1427         idx    = ishift + bs2*block + j + bs*point;
1428         a->a[idx] = aa[nzcountb++];
1429       }
1430     }
1431     /* zero out the mask elements we set */
1432     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1433   }
1434   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1435 
1436   PetscFree(rowlengths);
1437   PetscFree(browlengths);
1438   PetscFree(aa);
1439   PetscFree(jj);
1440   PetscFree(mask);
1441 
1442   B->assembled = PETSC_TRUE;
1443 
1444   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1445   if (flg) {
1446     Viewer tviewer;
1447     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1448     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1449     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1450     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1451   }
1452   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1453   if (flg) {
1454     Viewer tviewer;
1455     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1456     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1457     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1458     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1459   }
1460   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1461   if (flg) {
1462     Viewer tviewer;
1463     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1464     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1465     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1466   }
1467   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1468   if (flg) {
1469     Viewer tviewer;
1470     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1471     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1472     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1473     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1474   }
1475   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1476   if (flg) {
1477     Viewer tviewer;
1478     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1479     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1480     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1481     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1482   }
1483   return 0;
1484 }
1485 
1486 
1487 
1488