xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a7c10996e443b3a8ece69fc03ff391df87b1eb0e)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.34 1996/04/09 21:49:35 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 #define  max(a,b) (a>b ? a:b)
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
166     fprintf(fd,"  block size is %d\n",bs);
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else if (format == ASCII_FORMAT_COMMON) {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
183 #else
184           if (a->a[bs2*k + l*bs + j] != 0.0)
185             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
186 #endif
187           }
188         }
189         fprintf(fd,"\n");
190       }
191     }
192   }
193   else {
194     for ( i=0; i<a->mbs; i++ ) {
195       for ( j=0; j<bs; j++ ) {
196         fprintf(fd,"row %d:",i*bs+j);
197         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198           for ( l=0; l<bs; l++ ) {
199 #if defined(PETSC_COMPLEX)
200           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
201             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
202               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
203           }
204           else {
205             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
206           }
207 #else
208             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
209 #endif
210           }
211         }
212         fprintf(fd,"\n");
213       }
214     }
215   }
216   fflush(fd);
217   return 0;
218 }
219 
220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
221 {
222   Mat         A = (Mat) obj;
223   ViewerType  vtype;
224   int         ierr;
225 
226   if (!viewer) {
227     viewer = STDOUT_VIEWER_SELF;
228   }
229 
230   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
231   if (vtype == MATLAB_VIEWER) {
232     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
233   }
234   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235     return MatView_SeqBAIJ_ASCII(A,viewer);
236   }
237   else if (vtype == BINARY_FILE_VIEWER) {
238     return MatView_SeqBAIJ_Binary(A,viewer);
239   }
240   else if (vtype == DRAW_VIEWER) {
241     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
242   }
243   return 0;
244 }
245 
246 #define CHUNKSIZE  10
247 
248 /* This version has row oriented v  */
249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250 {
251   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
255   int          ridx,cidx,bs2=bs*bs;
256   Scalar      *ap,value,*aa=a->a,*bap;
257 
258   for ( k=0; k<m; k++ ) { /* loop over added rows */
259     row  = im[k]; brow = row/bs;
260     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
262     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
263     rmax = imax[brow]; nrow = ailen[brow];
264     low = 0;
265     for ( l=0; l<n; l++ ) { /* loop over added columns */
266       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268       col = in[l]; bcol = col/bs;
269       ridx = row % bs; cidx = col % bs;
270       if (roworiented) {
271         value = *v++;
272       }
273       else {
274         value = v[k + l*m];
275       }
276       if (!sorted) low = 0; high = nrow;
277       while (high-low > 5) {
278         t = (low+high)/2;
279         if (rp[t] > bcol) high = t;
280         else             low  = t;
281       }
282       for ( i=low; i<high; i++ ) {
283         if (rp[i] > bcol) break;
284         if (rp[i] == bcol) {
285           bap  = ap +  bs2*i + bs*cidx + ridx;
286           if (is == ADD_VALUES) *bap += value;
287           else                  *bap  = value;
288           goto noinsert;
289         }
290       }
291       if (nonew) goto noinsert;
292       if (nrow >= rmax) {
293         /* there is no extra room in row, therefore enlarge */
294         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295         Scalar *new_a;
296 
297         /* malloc new storage space */
298         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
300         new_j   = (int *) (new_a + bs2*new_nz);
301         new_i   = new_j + new_nz;
302 
303         /* copy over old data into new slots */
304         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
305         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
306         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
307         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
308         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
309                                                            len*sizeof(int));
310         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
311         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
312         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
313                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
314         /* free up old matrix storage */
315         PetscFree(a->a);
316         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318         a->singlemalloc = 1;
319 
320         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
321         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
322         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323         a->maxnz += CHUNKSIZE;
324         a->reallocs++;
325         a->nz++;
326       }
327       N = nrow++ - 1;
328       /* shift up all the later entries in this row */
329       for ( ii=N; ii>=i; ii-- ) {
330         rp[ii+1] = rp[ii];
331          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332       }
333       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334       rp[i] = bcol;
335       ap[bs2*i + bs*cidx + ridx] = value;
336       noinsert:;
337       low = i;
338     }
339     ailen[brow] = nrow;
340   }
341   return 0;
342 }
343 
344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
345 {
346   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
347   *m = a->m; *n = a->n;
348   return 0;
349 }
350 
351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
352 {
353   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
354   *m = 0; *n = a->m;
355   return 0;
356 }
357 
358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
362   Scalar      *aa,*v_i,*aa_i;
363 
364   bs  = a->bs;
365   ai  = a->i;
366   aj  = a->j;
367   aa  = a->a;
368   bs2 = bs*bs;
369 
370   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
371 
372   bn  = row/bs;   /* Block number */
373   bp  = row % bs; /* Block Position */
374   M   = ai[bn+1] - ai[bn];
375   *nz = bs*M;
376 
377   if (v) {
378     *v = 0;
379     if (*nz) {
380       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
381       for ( i=0; i<M; i++ ) { /* for each block in the block row */
382         v_i  = *v + i*bs;
383         aa_i = aa + bs2*(ai[bn] + i);
384         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
385       }
386     }
387   }
388 
389   if (idx) {
390     *idx = 0;
391     if (*nz) {
392       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
393       for ( i=0; i<M; i++ ) { /* for each block in the block row */
394         idx_i = *idx + i*bs;
395         itmp  = bs*aj[ai[bn] + i];
396         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
397       }
398     }
399   }
400   return 0;
401 }
402 
403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
404 {
405   if (idx) {if (*idx) PetscFree(*idx);}
406   if (v)   {if (*v)   PetscFree(*v);}
407   return 0;
408 }
409 
410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411 {
412   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413   Mat         C;
414   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415   int         *rows,*cols,bs2=bs*bs;
416   Scalar      *array=a->a;
417 
418   if (B==PETSC_NULL && mbs!=nbs)
419     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421   PetscMemzero(col,(1+nbs)*sizeof(int));
422 
423   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
424   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
425   PetscFree(col);
426   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
427   cols = rows + bs;
428   for ( i=0; i<mbs; i++ ) {
429     cols[0] = i*bs;
430     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
431     len = ai[i+1] - ai[i];
432     for ( j=0; j<len; j++ ) {
433       rows[0] = (*aj++)*bs;
434       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
435       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
436       array += bs2;
437     }
438   }
439   PetscFree(rows);
440 
441   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
443 
444   if (B != PETSC_NULL) {
445     *B = C;
446   } else {
447     /* This isn't really an in-place transpose */
448     PetscFree(a->a);
449     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
450     if (a->diag) PetscFree(a->diag);
451     if (a->ilen) PetscFree(a->ilen);
452     if (a->imax) PetscFree(a->imax);
453     if (a->solve_work) PetscFree(a->solve_work);
454     PetscFree(a);
455     PetscMemcpy(A,C,sizeof(struct _Mat));
456     PetscHeaderDestroy(C);
457   }
458   return 0;
459 }
460 
461 
462 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
463 {
464   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
465   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
466   int        m = a->m,*ip, N, *ailen = a->ilen;
467   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
468   Scalar     *aa = a->a, *ap;
469 
470   if (mode == FLUSH_ASSEMBLY) return 0;
471 
472   for ( i=1; i<mbs; i++ ) {
473     /* move each row back by the amount of empty slots (fshift) before it*/
474     fshift += imax[i-1] - ailen[i-1];
475     if (fshift) {
476       ip = aj + ai[i]; ap = aa + bs2*ai[i];
477       N = ailen[i];
478       for ( j=0; j<N; j++ ) {
479         ip[j-fshift] = ip[j];
480         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
481       }
482     }
483     ai[i] = ai[i-1] + ailen[i-1];
484   }
485   if (mbs) {
486     fshift += imax[mbs-1] - ailen[mbs-1];
487     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
488   }
489   /* reset ilen and imax for each row */
490   for ( i=0; i<mbs; i++ ) {
491     ailen[i] = imax[i] = ai[i+1] - ai[i];
492   }
493   a->nz = ai[mbs];
494 
495   /* diagonals may have moved, so kill the diagonal pointers */
496   if (fshift && a->diag) {
497     PetscFree(a->diag);
498     PLogObjectMemory(A,-(m+1)*sizeof(int));
499     a->diag = 0;
500   }
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
502   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
503            a->reallocs);
504   return 0;
505 }
506 
507 static int MatZeroEntries_SeqBAIJ(Mat A)
508 {
509   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
510   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
511   return 0;
512 }
513 
514 int MatDestroy_SeqBAIJ(PetscObject obj)
515 {
516   Mat         A  = (Mat) obj;
517   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
518 
519 #if defined(PETSC_LOG)
520   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
521 #endif
522   PetscFree(a->a);
523   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
524   if (a->diag) PetscFree(a->diag);
525   if (a->ilen) PetscFree(a->ilen);
526   if (a->imax) PetscFree(a->imax);
527   if (a->solve_work) PetscFree(a->solve_work);
528   if (a->mult_work) PetscFree(a->mult_work);
529   PetscFree(a);
530   PLogObjectDestroy(A);
531   PetscHeaderDestroy(A);
532   return 0;
533 }
534 
535 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
536 {
537   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
538   if      (op == ROW_ORIENTED)              a->roworiented = 1;
539   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
540   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
541   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
542   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
543   else if (op == ROWS_SORTED ||
544            op == SYMMETRIC_MATRIX ||
545            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
546            op == YES_NEW_DIAGONALS)
547     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
548   else if (op == NO_NEW_DIAGONALS)
549     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
550   else
551     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
552   return 0;
553 }
554 
555 
556 /* -------------------------------------------------------*/
557 /* Should check that shapes of vectors and matrices match */
558 /* -------------------------------------------------------*/
559 #include "pinclude/plapack.h"
560 
561 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
562 {
563   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
564   Scalar          *xg,*y,*zg;
565   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
566   register Scalar x1,x2,x3,x4,x5;
567   int             mbs=a->mbs,m=a->m,i,*idx,*ii;
568   int             bs=a->bs,j,n,bs2=bs*bs,ierr;
569 
570   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
571   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
572 
573   if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */
574   else if (zz!=yy){
575     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
576     PetscMemcpy(z,y,m*sizeof(Scalar));
577     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
578   }
579 
580   idx   = a->j;
581   v     = a->a;
582   ii    = a->i;
583 
584   switch (bs) {
585   case 1:
586     for ( i=0; i<mbs; i++ ) {
587       n    = ii[1] - ii[0]; ii++;
588       sum  = 0.0;
589       while (n--) sum += *v++ * x[*idx++];
590       z[i] += sum;
591     }
592     break;
593   case 2:
594     for ( i=0; i<mbs; i++ ) {
595       n  = ii[1] - ii[0]; ii++;
596       sum1 = 0.0; sum2 = 0.0;
597       for ( j=0; j<n; j++ ) {
598         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
599         sum1 += v[0]*x1 + v[2]*x2;
600         sum2 += v[1]*x1 + v[3]*x2;
601         v += 4;
602       }
603       z[0] += sum1; z[1] += sum2;
604       z += 2;
605     }
606     break;
607   case 3:
608     for ( i=0; i<mbs; i++ ) {
609       n  = ii[1] - ii[0]; ii++;
610       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
611       for ( j=0; j<n; j++ ) {
612         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
613         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
614         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
615         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
616         v += 9;
617       }
618       z[0] += sum1; z[1] += sum2; z[2] += sum3;
619       z += 3;
620     }
621     break;
622   case 4:
623     for ( i=0; i<mbs; i++ ) {
624       n  = ii[1] - ii[0]; ii++;
625       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
626       for ( j=0; j<n; j++ ) {
627         xb = x + 4*(*idx++);
628         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
629         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
630         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
631         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
632         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
633         v += 16;
634       }
635       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4;
636       z += 4;
637     }
638     break;
639   case 5:
640     for ( i=0; i<mbs; i++ ) {
641       n  = ii[1] - ii[0]; ii++;
642       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
643       for ( j=0; j<n; j++ ) {
644         xb = x + 5*(*idx++);
645         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
646         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
647         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
648         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
649         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
650         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
651         v += 25;
652       }
653       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5;
654       z += 5;
655     }
656     break;
657     /* block sizes larger then 5 by 5 are handled by BLAS */
658   default: {
659       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
660       if (!a->mult_work) {
661         k = max(a->m,a->n);
662         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
663         CHKPTRQ(a->mult_work);
664       }
665       work = a->mult_work;
666       for ( i=0; i<mbs; i++ ) {
667         n     = ii[1] - ii[0]; ii++;
668         ncols = n*bs;
669         workt = work;
670         for ( j=0; j<n; j++ ) {
671           xb = x + bs*(*idx++);
672           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
673           workt += bs;
674         }
675         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
676         v += n*bs2;
677         z += bs;
678       }
679     }
680   }
681   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
682   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
683   return 0;
684 }
685 
686 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy)
687 {
688   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
689   int         ierr,bs=a->bs,bs2=bs*bs;
690 
691   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
692   PLogFlops(2*bs2*(a->nz)-a->m);
693   return 0;
694 }
695 
696 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
697 {
698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
699   int         ierr,bs=a->bs,bs2=bs*bs;
700 
701   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
702   PLogFlops(2*bs2*(a->nz));
703   return 0;
704 }
705 
706 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
707 {
708   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
709   Scalar          *xg,*y,*zg,*zb;
710   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
711   int             mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval;
712   int             bs=a->bs,j,n,bs2=bs*bs,*ib,ierr;
713 
714 
715   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
716   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
717 
718   if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */
719   else if (zz!=yy){
720     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
721     PetscMemcpy(z,y,N*sizeof(Scalar));
722     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
723   }
724 
725   idx   = a->j;
726   v     = a->a;
727   ii    = a->i;
728 
729   switch (bs) {
730   case 1:
731     for ( i=0; i<mbs; i++ ) {
732       n  = ii[1] - ii[0]; ii++;
733       xb = x + i; x1 = xb[0];
734       ib = idx + ai[i];
735       for ( j=0; j<n; j++ ) {
736         z[ib[j]] += *v++ * x1;
737       }
738     }
739     break;
740   case 2:
741     for ( i=0; i<mbs; i++ ) {
742       n  = ii[1] - ii[0]; ii++;
743       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
744       ib = idx + ai[i];
745       for ( j=0; j<n; j++ ) {
746         rval = ib[j]*2;
747         z[rval++] += v[0]*x1 + v[1]*x2;
748         z[rval++] += v[2]*x1 + v[3]*x2;
749         v += 4;
750       }
751     }
752     break;
753   case 3:
754     for ( i=0; i<mbs; i++ ) {
755       n  = ii[1] - ii[0]; ii++;
756       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757       ib = idx + ai[i];
758       for ( j=0; j<n; j++ ) {
759         rval = ib[j]*3;
760         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
761         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
762         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
763         v += 9;
764       }
765     }
766     break;
767   case 4:
768     for ( i=0; i<mbs; i++ ) {
769       n  = ii[1] - ii[0]; ii++;
770       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
771       ib = idx + ai[i];
772       for ( j=0; j<n; j++ ) {
773         rval = ib[j]*4;
774         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
775         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
776         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
777         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
778         v += 16;
779       }
780     }
781     break;
782   case 5:
783     for ( i=0; i<mbs; i++ ) {
784       n  = ii[1] - ii[0]; ii++;
785       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
786       x4 = xb[3];   x5 = xb[4];
787       ib = idx + ai[i];
788       for ( j=0; j<n; j++ ) {
789         rval = ib[j]*5;
790         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
791         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
792         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
793         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
794         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
795         v += 25;
796       }
797     }
798     break;
799       /* block sizes larger then 5 by 5 are handled by BLAS */
800     default: {
801       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
802       if (!a->mult_work) {
803         k = max(a->m,a->n);
804         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
805         CHKPTRQ(a->mult_work);
806       }
807       work = a->mult_work;
808       for ( i=0; i<mbs; i++ ) {
809         n     = ii[1] - ii[0]; ii++;
810         ncols = n*bs;
811         PetscMemzero(work,ncols*sizeof(Scalar));
812         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
813         v += n*bs2;
814         x += bs;
815         workt = work;
816         for ( j=0; j<n; j++ ) {
817           zb = z + bs*(*idx++);
818           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
819           workt += bs;
820         }
821       }
822     }
823   }
824   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
825   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
826   return 0;
827 }
828 
829 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy)
830 {
831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
832   int         ierr,bs=a->bs,bs2=bs*bs;
833 
834   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
835   PLogFlops(2*bs2*(a->nz)-a->n);
836   return 0;
837 }
838 
839 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
840 {
841   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
842   int         ierr,bs=a->bs,bs2=bs*bs;
843 
844   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
845   PLogFlops(2*bs2*(a->nz));
846   return 0;
847 }
848 
849 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
850 {
851   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
852   if (nz)  *nz  = a->bs*a->bs*a->nz;
853   if (nza) *nza = a->maxnz;
854   if (mem) *mem = (int)A->mem;
855   return 0;
856 }
857 
858 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
859 {
860   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
861 
862   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
863 
864   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
865   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
866       (a->nz != b->nz)) {
867     *flg = PETSC_FALSE; return 0;
868   }
869 
870   /* if the a->i are the same */
871   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
872     *flg = PETSC_FALSE; return 0;
873   }
874 
875   /* if a->j are the same */
876   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
877     *flg = PETSC_FALSE; return 0;
878   }
879 
880   /* if a->a are the same */
881   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
882     *flg = PETSC_FALSE; return 0;
883   }
884   *flg = PETSC_TRUE;
885   return 0;
886 
887 }
888 
889 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
890 {
891   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
892   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
893   Scalar      *x, zero = 0.0,*aa,*aa_j;
894 
895   bs   = a->bs;
896   aa   = a->a;
897   ai   = a->i;
898   aj   = a->j;
899   ambs = a->mbs;
900   bs2  = bs*bs;
901 
902   VecSet(&zero,v);
903   VecGetArray(v,&x); VecGetLocalSize(v,&n);
904   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
905   for ( i=0; i<ambs; i++ ) {
906     for ( j=ai[i]; j<ai[i+1]; j++ ) {
907       if (aj[j] == i) {
908         row  = i*bs;
909         aa_j = aa+j*bs2;
910         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
911         break;
912       }
913     }
914   }
915   return 0;
916 }
917 
918 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
919 {
920   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
921   Scalar      *l,*r,x,*v,*aa,*li,*ri;
922   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
923 
924   ai  = a->i;
925   aj  = a->j;
926   aa  = a->a;
927   m   = a->m;
928   n   = a->n;
929   bs  = a->bs;
930   mbs = a->mbs;
931   bs2 = bs*bs;
932   if (ll) {
933     VecGetArray(ll,&l); VecGetSize(ll,&lm);
934     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
935     for ( i=0; i<mbs; i++ ) { /* for each block row */
936       M  = ai[i+1] - ai[i];
937       li = l + i*bs;
938       v  = aa + bs2*ai[i];
939       for ( j=0; j<M; j++ ) { /* for each block */
940         for ( k=0; k<bs2; k++ ) {
941           (*v++) *= li[k%bs];
942         }
943       }
944     }
945   }
946 
947   if (rr) {
948     VecGetArray(rr,&r); VecGetSize(rr,&rn);
949     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
950     for ( i=0; i<mbs; i++ ) { /* for each block row */
951       M  = ai[i+1] - ai[i];
952       v  = aa + bs2*ai[i];
953       for ( j=0; j<M; j++ ) { /* for each block */
954         ri = r + bs*aj[ai[i]+j];
955         for ( k=0; k<bs; k++ ) {
956           x = ri[k];
957           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
958         }
959       }
960     }
961   }
962   return 0;
963 }
964 
965 
966 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
967 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
968 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
969 extern int MatSolveAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
970 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
971 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
972 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
973 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
974 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
975 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
976 
977 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
978 {
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
980   Scalar      *v = a->a;
981   double      sum = 0.0;
982   int         i,bs=a->bs,nz=a->nz,bs2=bs*bs;
983 
984   if (type == NORM_FROBENIUS) {
985     for (i=0; i< bs2*nz; i++ ) {
986 #if defined(PETSC_COMPLEX)
987       sum += real(conj(*v)*(*v)); v++;
988 #else
989       sum += (*v)*(*v); v++;
990 #endif
991     }
992     *norm = sqrt(sum);
993   }
994   else {
995     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
996   }
997   return 0;
998 }
999 
1000 /*
1001      note: This can only work for identity for row and col. It would
1002    be good to check this and otherwise generate an error.
1003 */
1004 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1005 {
1006   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1007   Mat         outA;
1008   int         ierr;
1009 
1010   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1011 
1012   outA          = inA;
1013   inA->factor   = FACTOR_LU;
1014   a->row        = row;
1015   a->col        = col;
1016 
1017   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1018 
1019   if (!a->diag) {
1020     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1021   }
1022   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1023   return 0;
1024 }
1025 
1026 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1027 {
1028   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1029   int         one = 1, totalnz = a->bs*a->bs*a->nz;
1030   BLscal_( &totalnz, alpha, a->a, &one );
1031   PLogFlops(totalnz);
1032   return 0;
1033 }
1034 
1035 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1036 {
1037   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1038   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1039   int        *ai = a->i, *ailen = a->ilen;
1040   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
1041   Scalar     *ap, *aa = a->a, zero = 0.0;
1042 
1043   for ( k=0; k<m; k++ ) { /* loop over rows */
1044     row  = im[k]; brow = row/bs;
1045     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1046     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1047     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1048     nrow = ailen[brow];
1049     for ( l=0; l<n; l++ ) { /* loop over columns */
1050       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1051       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1052       col  = in[l] ;
1053       bcol = col/bs;
1054       cidx = col%bs;
1055       ridx = row%bs;
1056       high = nrow;
1057       low  = 0; /* assume unsorted */
1058       while (high-low > 5) {
1059         t = (low+high)/2;
1060         if (rp[t] > bcol) high = t;
1061         else             low  = t;
1062       }
1063       for ( i=low; i<high; i++ ) {
1064         if (rp[i] > bcol) break;
1065         if (rp[i] == bcol) {
1066           *v++ = ap[bs2*i+bs*cidx+ridx];
1067           goto finished;
1068         }
1069       }
1070       *v++ = zero;
1071       finished:;
1072     }
1073   }
1074   return 0;
1075 }
1076 
1077 int MatPrintHelp_SeqBAIJ(Mat A)
1078 {
1079   static int called = 0;
1080 
1081   if (called) return 0; else called = 1;
1082   return 0;
1083 }
1084 /* -------------------------------------------------------------------*/
1085 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1086        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1087        MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ,
1088        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1089        MatSolve_SeqBAIJ,MatSolveAdd_SeqBAIJ,
1090        0,0,
1091        MatLUFactor_SeqBAIJ,0,
1092        0,
1093        MatTranspose_SeqBAIJ,
1094        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1095        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1096        0,MatAssemblyEnd_SeqBAIJ,
1097        0,
1098        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1099        MatGetReordering_SeqBAIJ,
1100        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1101        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1102        MatILUFactorSymbolic_SeqBAIJ,0,
1103        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1104        0,0,
1105        MatConvertSameType_SeqBAIJ,0,0,
1106        MatILUFactor_SeqBAIJ,0,0,
1107        0,0,
1108        MatGetValues_SeqBAIJ,0,
1109        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1110        0};
1111 
1112 /*@C
1113    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1114    compressed row) format.  For good matrix assembly performance the
1115    user should preallocate the matrix storage by setting the parameter nz
1116    (or nzz).  By setting these parameters accurately, performance can be
1117    increased by more than a factor of 50.
1118 
1119    Input Parameters:
1120 .  comm - MPI communicator, set to MPI_COMM_SELF
1121 .  bs - size of block
1122 .  m - number of rows
1123 .  n - number of columns
1124 .  nz - number of block nonzeros per block row (same for all rows)
1125 .  nzz - number of block nonzeros per block row or PETSC_NULL
1126          (possibly different for each row)
1127 
1128    Output Parameter:
1129 .  A - the matrix
1130 
1131    Notes:
1132    The block AIJ format is fully compatible with standard Fortran 77
1133    storage.  That is, the stored row and column indices can begin at
1134    either one (as in Fortran) or zero.  See the users' manual for details.
1135 
1136    Specify the preallocated storage with either nz or nnz (not both).
1137    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1138    allocation.  For additional details, see the users manual chapter on
1139    matrices and the file $(PETSC_DIR)/Performance.
1140 
1141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1142 @*/
1143 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1144 {
1145   Mat         B;
1146   Mat_SeqBAIJ *b;
1147   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1148 
1149   if (mbs*bs!=m || nbs*bs!=n)
1150     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1151 
1152   *A = 0;
1153   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1154   PLogObjectCreate(B);
1155   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1156   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1157   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1158   switch (bs) {
1159     case 1:
1160       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1161       break;
1162     case 2:
1163       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1164       break;
1165     case 3:
1166       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1167       break;
1168     case 4:
1169       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1170       break;
1171     case 5:
1172       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1173       break;
1174   }
1175   B->destroy          = MatDestroy_SeqBAIJ;
1176   B->view             = MatView_SeqBAIJ;
1177   B->factor           = 0;
1178   B->lupivotthreshold = 1.0;
1179   b->row              = 0;
1180   b->col              = 0;
1181   b->reallocs         = 0;
1182 
1183   b->m       = m; B->m = m; B->M = m;
1184   b->n       = n; B->n = n; B->N = n;
1185   b->mbs     = mbs;
1186   b->nbs     = nbs;
1187   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1188   if (nnz == PETSC_NULL) {
1189     if (nz == PETSC_DEFAULT) nz = 5;
1190     else if (nz <= 0)        nz = 1;
1191     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1192     nz = nz*mbs;
1193   }
1194   else {
1195     nz = 0;
1196     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1197   }
1198 
1199   /* allocate the matrix space */
1200   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1201   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1202   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1203   b->j  = (int *) (b->a + nz*bs2);
1204   PetscMemzero(b->j,nz*sizeof(int));
1205   b->i  = b->j + nz;
1206   b->singlemalloc = 1;
1207 
1208   b->i[0] = 0;
1209   for (i=1; i<mbs+1; i++) {
1210     b->i[i] = b->i[i-1] + b->imax[i-1];
1211   }
1212 
1213   /* b->ilen will count nonzeros in each block row so far. */
1214   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1215   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1216   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1217 
1218   b->bs               = bs;
1219   b->mbs              = mbs;
1220   b->nz               = 0;
1221   b->maxnz            = nz;
1222   b->sorted           = 0;
1223   b->roworiented      = 1;
1224   b->nonew            = 0;
1225   b->diag             = 0;
1226   b->solve_work       = 0;
1227   b->mult_work        = 0;
1228   b->spptr            = 0;
1229   *A = B;
1230   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1231   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1232   return 0;
1233 }
1234 
1235 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1236 {
1237   Mat         C;
1238   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1239   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1240 
1241   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1242 
1243   *B = 0;
1244   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1245   PLogObjectCreate(C);
1246   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1247   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1248   C->destroy    = MatDestroy_SeqBAIJ;
1249   C->view       = MatView_SeqBAIJ;
1250   C->factor     = A->factor;
1251   c->row        = 0;
1252   c->col        = 0;
1253   C->assembled  = PETSC_TRUE;
1254 
1255   c->m = C->m   = a->m;
1256   c->n = C->n   = a->n;
1257   C->M          = a->m;
1258   C->N          = a->n;
1259 
1260   c->bs         = a->bs;
1261   c->mbs        = a->mbs;
1262   c->nbs        = a->nbs;
1263 
1264   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1265   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1266   for ( i=0; i<mbs; i++ ) {
1267     c->imax[i] = a->imax[i];
1268     c->ilen[i] = a->ilen[i];
1269   }
1270 
1271   /* allocate the matrix space */
1272   c->singlemalloc = 1;
1273   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1274   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1275   c->j  = (int *) (c->a + nz*bs2);
1276   c->i  = c->j + nz;
1277   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1278   if (mbs > 0) {
1279     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1280     if (cpvalues == COPY_VALUES) {
1281       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1282     }
1283   }
1284 
1285   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1286   c->sorted      = a->sorted;
1287   c->roworiented = a->roworiented;
1288   c->nonew       = a->nonew;
1289 
1290   if (a->diag) {
1291     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1292     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1293     for ( i=0; i<mbs; i++ ) {
1294       c->diag[i] = a->diag[i];
1295     }
1296   }
1297   else c->diag          = 0;
1298   c->nz                 = a->nz;
1299   c->maxnz              = a->maxnz;
1300   c->solve_work         = 0;
1301   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1302   c->mult_work          = 0;
1303   *B = C;
1304   return 0;
1305 }
1306 
1307 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1308 {
1309   Mat_SeqBAIJ  *a;
1310   Mat          B;
1311   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1312   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1313   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1314   int          *masked, nmask,tmp,bs2,ishift;
1315   Scalar       *aa;
1316   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1317 
1318   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1319   bs2  = bs*bs;
1320 
1321   MPI_Comm_size(comm,&size);
1322   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1323   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1324   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1325   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1326   M = header[1]; N = header[2]; nz = header[3];
1327 
1328   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1329 
1330   /*
1331      This code adds extra rows to make sure the number of rows is
1332     divisible by the blocksize
1333   */
1334   mbs        = M/bs;
1335   extra_rows = bs - M + bs*(mbs);
1336   if (extra_rows == bs) extra_rows = 0;
1337   else                  mbs++;
1338   if (extra_rows) {
1339     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1340   }
1341 
1342   /* read in row lengths */
1343   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1344   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1345   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1346 
1347   /* read in column indices */
1348   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1349   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1350   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1351 
1352   /* loop over row lengths determining block row lengths */
1353   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1354   PetscMemzero(browlengths,mbs*sizeof(int));
1355   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1356   PetscMemzero(mask,mbs*sizeof(int));
1357   masked = mask + mbs;
1358   rowcount = 0; nzcount = 0;
1359   for ( i=0; i<mbs; i++ ) {
1360     nmask = 0;
1361     for ( j=0; j<bs; j++ ) {
1362       kmax = rowlengths[rowcount];
1363       for ( k=0; k<kmax; k++ ) {
1364         tmp = jj[nzcount++]/bs;
1365         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1366       }
1367       rowcount++;
1368     }
1369     browlengths[i] += nmask;
1370     /* zero out the mask elements we set */
1371     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1372   }
1373 
1374   /* create our matrix */
1375   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1376          CHKERRQ(ierr);
1377   B = *A;
1378   a = (Mat_SeqBAIJ *) B->data;
1379 
1380   /* set matrix "i" values */
1381   a->i[0] = 0;
1382   for ( i=1; i<= mbs; i++ ) {
1383     a->i[i]      = a->i[i-1] + browlengths[i-1];
1384     a->ilen[i-1] = browlengths[i-1];
1385   }
1386   a->nz         = 0;
1387   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1388 
1389   /* read in nonzero values */
1390   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1391   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1392   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1393 
1394   /* set "a" and "j" values into matrix */
1395   nzcount = 0; jcount = 0;
1396   for ( i=0; i<mbs; i++ ) {
1397     nzcountb = nzcount;
1398     nmask    = 0;
1399     for ( j=0; j<bs; j++ ) {
1400       kmax = rowlengths[i*bs+j];
1401       for ( k=0; k<kmax; k++ ) {
1402         tmp = jj[nzcount++]/bs;
1403 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1404       }
1405       rowcount++;
1406     }
1407     /* sort the masked values */
1408     PetscSortInt(nmask,masked);
1409 
1410     /* set "j" values into matrix */
1411     maskcount = 1;
1412     for ( j=0; j<nmask; j++ ) {
1413       a->j[jcount++]  = masked[j];
1414       mask[masked[j]] = maskcount++;
1415     }
1416     /* set "a" values into matrix */
1417     ishift = bs2*a->i[i];
1418     for ( j=0; j<bs; j++ ) {
1419       kmax = rowlengths[i*bs+j];
1420       for ( k=0; k<kmax; k++ ) {
1421         tmp    = jj[nzcountb]/bs ;
1422         block  = mask[tmp] - 1;
1423         point  = jj[nzcountb] - bs*tmp;
1424         idx    = ishift + bs2*block + j + bs*point;
1425         a->a[idx] = aa[nzcountb++];
1426       }
1427     }
1428     /* zero out the mask elements we set */
1429     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1430   }
1431   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1432 
1433   PetscFree(rowlengths);
1434   PetscFree(browlengths);
1435   PetscFree(aa);
1436   PetscFree(jj);
1437   PetscFree(mask);
1438 
1439   B->assembled = PETSC_TRUE;
1440 
1441   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1442   if (flg) {
1443     Viewer tviewer;
1444     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1445     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1446     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1447     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1448   }
1449   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1450   if (flg) {
1451     Viewer tviewer;
1452     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1453     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1454     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1455     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1456   }
1457   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1458   if (flg) {
1459     Viewer tviewer;
1460     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1461     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1462     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1463   }
1464   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1465   if (flg) {
1466     Viewer tviewer;
1467     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1468     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1469     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1470     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1471   }
1472   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1473   if (flg) {
1474     Viewer tviewer;
1475     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1476     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1477     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1478     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1479   }
1480   return 0;
1481 }
1482 
1483 
1484 
1485