xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 736121d41ed498147adfb28afb4752aac0794cb6)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.42 1996/04/30 19:19:34 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
220 {
221   Mat         A = (Mat) obj;
222   ViewerType  vtype;
223   int         ierr;
224 
225   if (!viewer) {
226     viewer = STDOUT_VIEWER_SELF;
227   }
228 
229   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
230   if (vtype == MATLAB_VIEWER) {
231     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
232   }
233   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234     return MatView_SeqBAIJ_ASCII(A,viewer);
235   }
236   else if (vtype == BINARY_FILE_VIEWER) {
237     return MatView_SeqBAIJ_Binary(A,viewer);
238   }
239   else if (vtype == DRAW_VIEWER) {
240     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
241   }
242   return 0;
243 }
244 
245 #define CHUNKSIZE  10
246 
247 /* This version has row oriented v  */
248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249 {
250   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
254   int          ridx,cidx,bs2=a->bs2;
255   Scalar      *ap,value,*aa=a->a,*bap;
256 
257   for ( k=0; k<m; k++ ) { /* loop over added rows */
258     row  = im[k]; brow = row/bs;
259     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262     rmax = imax[brow]; nrow = ailen[brow];
263     low = 0;
264     for ( l=0; l<n; l++ ) { /* loop over added columns */
265       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267       col = in[l]; bcol = col/bs;
268       ridx = row % bs; cidx = col % bs;
269       if (roworiented) {
270         value = *v++;
271       }
272       else {
273         value = v[k + l*m];
274       }
275       if (!sorted) low = 0; high = nrow;
276       while (high-low > 5) {
277         t = (low+high)/2;
278         if (rp[t] > bcol) high = t;
279         else             low  = t;
280       }
281       for ( i=low; i<high; i++ ) {
282         if (rp[i] > bcol) break;
283         if (rp[i] == bcol) {
284           bap  = ap +  bs2*i + bs*cidx + ridx;
285           if (is == ADD_VALUES) *bap += value;
286           else                  *bap  = value;
287           goto noinsert;
288         }
289       }
290       if (nonew) goto noinsert;
291       if (nrow >= rmax) {
292         /* there is no extra room in row, therefore enlarge */
293         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294         Scalar *new_a;
295 
296         /* malloc new storage space */
297         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
299         new_j   = (int *) (new_a + bs2*new_nz);
300         new_i   = new_j + new_nz;
301 
302         /* copy over old data into new slots */
303         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
304         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308                                                            len*sizeof(int));
309         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313         /* free up old matrix storage */
314         PetscFree(a->a);
315         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317         a->singlemalloc = 1;
318 
319         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
321         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
322         a->maxnz += CHUNKSIZE;
323         a->reallocs++;
324         a->nz++;
325       }
326       N = nrow++ - 1;
327       /* shift up all the later entries in this row */
328       for ( ii=N; ii>=i; ii-- ) {
329         rp[ii+1] = rp[ii];
330          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331       }
332       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333       rp[i] = bcol;
334       ap[bs2*i + bs*cidx + ridx] = value;
335       noinsert:;
336       low = i;
337     }
338     ailen[brow] = nrow;
339   }
340   return 0;
341 }
342 
343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
344 {
345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
346   *m = a->m; *n = a->n;
347   return 0;
348 }
349 
350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = 0; *n = a->m;
354   return 0;
355 }
356 
357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
361   Scalar      *aa,*v_i,*aa_i;
362 
363   bs  = a->bs;
364   ai  = a->i;
365   aj  = a->j;
366   aa  = a->a;
367   bs2 = a->bs2;
368 
369   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
370 
371   bn  = row/bs;   /* Block number */
372   bp  = row % bs; /* Block Position */
373   M   = ai[bn+1] - ai[bn];
374   *nz = bs*M;
375 
376   if (v) {
377     *v = 0;
378     if (*nz) {
379       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
380       for ( i=0; i<M; i++ ) { /* for each block in the block row */
381         v_i  = *v + i*bs;
382         aa_i = aa + bs2*(ai[bn] + i);
383         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
384       }
385     }
386   }
387 
388   if (idx) {
389     *idx = 0;
390     if (*nz) {
391       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
392       for ( i=0; i<M; i++ ) { /* for each block in the block row */
393         idx_i = *idx + i*bs;
394         itmp  = bs*aj[ai[bn] + i];
395         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
396       }
397     }
398   }
399   return 0;
400 }
401 
402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
403 {
404   if (idx) {if (*idx) PetscFree(*idx);}
405   if (v)   {if (*v)   PetscFree(*v);}
406   return 0;
407 }
408 
409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410 {
411   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412   Mat         C;
413   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
414   int         *rows,*cols,bs2=a->bs2;
415   Scalar      *array=a->a;
416 
417   if (B==PETSC_NULL && mbs!=nbs)
418     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420   PetscMemzero(col,(1+nbs)*sizeof(int));
421 
422   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424   PetscFree(col);
425   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426   cols = rows + bs;
427   for ( i=0; i<mbs; i++ ) {
428     cols[0] = i*bs;
429     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430     len = ai[i+1] - ai[i];
431     for ( j=0; j<len; j++ ) {
432       rows[0] = (*aj++)*bs;
433       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435       array += bs2;
436     }
437   }
438   PetscFree(rows);
439 
440   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442 
443   if (B != PETSC_NULL) {
444     *B = C;
445   } else {
446     /* This isn't really an in-place transpose */
447     PetscFree(a->a);
448     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449     if (a->diag) PetscFree(a->diag);
450     if (a->ilen) PetscFree(a->ilen);
451     if (a->imax) PetscFree(a->imax);
452     if (a->solve_work) PetscFree(a->solve_work);
453     PetscFree(a);
454     PetscMemcpy(A,C,sizeof(struct _Mat));
455     PetscHeaderDestroy(C);
456   }
457   return 0;
458 }
459 
460 
461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462 {
463   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465   int        m = a->m,*ip, N, *ailen = a->ilen;
466   int        mbs = a->mbs, bs2 = a->bs2;
467   Scalar     *aa = a->a, *ap;
468 
469   if (mode == FLUSH_ASSEMBLY) return 0;
470 
471   for ( i=1; i<mbs; i++ ) {
472     /* move each row back by the amount of empty slots (fshift) before it*/
473     fshift += imax[i-1] - ailen[i-1];
474     if (fshift) {
475       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476       N = ailen[i];
477       for ( j=0; j<N; j++ ) {
478         ip[j-fshift] = ip[j];
479         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480       }
481     }
482     ai[i] = ai[i-1] + ailen[i-1];
483   }
484   if (mbs) {
485     fshift += imax[mbs-1] - ailen[mbs-1];
486     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487   }
488   /* reset ilen and imax for each row */
489   for ( i=0; i<mbs; i++ ) {
490     ailen[i] = imax[i] = ai[i+1] - ai[i];
491   }
492   a->nz = ai[mbs];
493 
494   /* diagonals may have moved, so kill the diagonal pointers */
495   if (fshift && a->diag) {
496     PetscFree(a->diag);
497     PLogObjectMemory(A,-(m+1)*sizeof(int));
498     a->diag = 0;
499   }
500   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502            a->reallocs);
503   return 0;
504 }
505 
506 static int MatZeroEntries_SeqBAIJ(Mat A)
507 {
508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
509   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
510   return 0;
511 }
512 
513 int MatDestroy_SeqBAIJ(PetscObject obj)
514 {
515   Mat         A  = (Mat) obj;
516   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
517 
518 #if defined(PETSC_LOG)
519   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
520 #endif
521   PetscFree(a->a);
522   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
523   if (a->diag) PetscFree(a->diag);
524   if (a->ilen) PetscFree(a->ilen);
525   if (a->imax) PetscFree(a->imax);
526   if (a->solve_work) PetscFree(a->solve_work);
527   if (a->mult_work) PetscFree(a->mult_work);
528   PetscFree(a);
529   PLogObjectDestroy(A);
530   PetscHeaderDestroy(A);
531   return 0;
532 }
533 
534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
535 {
536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
537   if      (op == ROW_ORIENTED)              a->roworiented = 1;
538   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
539   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
540   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
541   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
542   else if (op == ROWS_SORTED ||
543            op == SYMMETRIC_MATRIX ||
544            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
545            op == YES_NEW_DIAGONALS)
546     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
547   else if (op == NO_NEW_DIAGONALS)
548     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
549   else
550     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
551   return 0;
552 }
553 
554 
555 /* -------------------------------------------------------*/
556 /* Should check that shapes of vectors and matrices match */
557 /* -------------------------------------------------------*/
558 #include "pinclude/plapack.h"
559 
560 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz)
561 {
562   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
563   Scalar          *xg,*zg;
564   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
565   register Scalar x1,x2,x3,x4,x5;
566   int             mbs=a->mbs,i,*idx,*ii;
567   int             bs=a->bs,j,n,bs2=a->bs2,ierr;
568 
569   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
570   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
571 
572   idx   = a->j;
573   v     = a->a;
574   ii    = a->i;
575 
576   switch (bs) {
577   case 1:
578     for ( i=0; i<mbs; i++ ) {
579       n    = ii[1] - ii[0]; ii++;
580       sum  = 0.0;
581       while (n--) sum += *v++ * x[*idx++];
582       z[i] = sum;
583     }
584     break;
585   case 2:
586     for ( i=0; i<mbs; i++ ) {
587       n  = ii[1] - ii[0]; ii++;
588       sum1 = 0.0; sum2 = 0.0;
589       for ( j=0; j<n; j++ ) {
590         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
591         sum1 += v[0]*x1 + v[2]*x2;
592         sum2 += v[1]*x1 + v[3]*x2;
593         v += 4;
594       }
595       z[0] = sum1; z[1] = sum2;
596       z += 2;
597     }
598     break;
599   case 3:
600     for ( i=0; i<mbs; i++ ) {
601       n  = ii[1] - ii[0]; ii++;
602       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
603       for ( j=0; j<n; j++ ) {
604         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
605         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
606         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
607         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
608         v += 9;
609       }
610       z[0] = sum1; z[1] = sum2; z[2] = sum3;
611       z += 3;
612     }
613     break;
614   case 4:
615     for ( i=0; i<mbs; i++ ) {
616       n  = ii[1] - ii[0]; ii++;
617       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
618       for ( j=0; j<n; j++ ) {
619         xb = x + 4*(*idx++);
620         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
621         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
622         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
623         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
624         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
625         v += 16;
626       }
627       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
628       z += 4;
629     }
630     break;
631   case 5:
632     for ( i=0; i<mbs; i++ ) {
633       n  = ii[1] - ii[0]; ii++;
634       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
635       for ( j=0; j<n; j++ ) {
636         xb = x + 5*(*idx++);
637         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
638         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
639         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
640         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
641         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
642         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
643         v += 25;
644       }
645       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
646       z += 5;
647     }
648     break;
649     /* block sizes larger then 5 by 5 are handled by BLAS */
650   default: {
651       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
652       if (!a->mult_work) {
653         k = PetscMax(a->m,a->n);
654         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
655         CHKPTRQ(a->mult_work);
656       }
657       work = a->mult_work;
658       for ( i=0; i<mbs; i++ ) {
659         n     = ii[1] - ii[0]; ii++;
660         ncols = n*bs;
661         workt = work;
662         for ( j=0; j<n; j++ ) {
663           xb = x + bs*(*idx++);
664           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
665           workt += bs;
666         }
667         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
668         v += n*bs2;
669         z += bs;
670       }
671     }
672   }
673   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
674   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
675   PLogFlops(2*a->nz*bs2 - a->m);
676   return 0;
677 }
678 
679 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
680 {
681   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
682   Scalar          *xg,*zg,*zb;
683   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
684   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
685   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
686 
687 
688   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690 
691   idx   = a->j;
692   v     = a->a;
693   ii    = a->i;
694 
695   switch (bs) {
696   case 1:
697     for ( i=0; i<mbs; i++ ) {
698       n  = ii[1] - ii[0]; ii++;
699       xb = x + i; x1 = xb[0];
700       ib = idx + ai[i];
701       for ( j=0; j<n; j++ ) {
702         z[ib[j]] = *v++ * x1;
703       }
704     }
705     break;
706   case 2:
707     for ( i=0; i<mbs; i++ ) {
708       n  = ii[1] - ii[0]; ii++;
709       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
710       ib = idx + ai[i];
711       for ( j=0; j<n; j++ ) {
712         rval = ib[j]*2;
713         z[rval++] = v[0]*x1 + v[1]*x2;
714         z[rval++] = v[2]*x1 + v[3]*x2;
715         v += 4;
716       }
717     }
718     break;
719   case 3:
720     for ( i=0; i<mbs; i++ ) {
721       n  = ii[1] - ii[0]; ii++;
722       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
723       ib = idx + ai[i];
724       for ( j=0; j<n; j++ ) {
725         rval = ib[j]*3;
726         z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3;
727         z[rval++] = v[3]*x1 + v[4]*x2 + v[5]*x3;
728         z[rval++] = v[6]*x1 + v[7]*x2 + v[8]*x3;
729         v += 9;
730       }
731     }
732     break;
733   case 4:
734     for ( i=0; i<mbs; i++ ) {
735       n  = ii[1] - ii[0]; ii++;
736       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
737       ib = idx + ai[i];
738       for ( j=0; j<n; j++ ) {
739         rval = ib[j]*4;
740         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
741         z[rval++] =  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
742         z[rval++] =  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
743         z[rval++] = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
744         v += 16;
745       }
746     }
747     break;
748   case 5:
749     for ( i=0; i<mbs; i++ ) {
750       n  = ii[1] - ii[0]; ii++;
751       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
752       x4 = xb[3];   x5 = xb[4];
753       ib = idx + ai[i];
754       for ( j=0; j<n; j++ ) {
755         rval = ib[j]*5;
756         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
757         z[rval++] =  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
758         z[rval++] = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
759         z[rval++] = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
760         z[rval++] = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
761         v += 25;
762       }
763     }
764     break;
765       /* block sizes larger then 5 by 5 are handled by BLAS */
766     default: {
767       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
768       if (!a->mult_work) {
769         k = PetscMax(a->m,a->n);
770         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
771         CHKPTRQ(a->mult_work);
772       }
773       work = a->mult_work;
774       for ( i=0; i<mbs; i++ ) {
775         n     = ii[1] - ii[0]; ii++;
776         ncols = n*bs;
777         PetscMemzero(work,ncols*sizeof(Scalar));
778         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
779         v += n*bs2;
780         x += bs;
781         workt = work;
782         for ( j=0; j<n; j++ ) {
783           zb = z + bs*(*idx++);
784           for ( k=0; k<bs; k++ ) zb[k] = workt[k] ;
785           workt += bs;
786         }
787       }
788     }
789   }
790   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
791   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
792   return 0;
793 }
794 
795 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
796 {
797   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
798   if (nz)  *nz  = a->bs2*a->nz;
799   if (nza) *nza = a->maxnz;
800   if (mem) *mem = (int)A->mem;
801   return 0;
802 }
803 
804 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
805 {
806   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
807 
808   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
809 
810   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
811   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
812       (a->nz != b->nz)) {
813     *flg = PETSC_FALSE; return 0;
814   }
815 
816   /* if the a->i are the same */
817   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
818     *flg = PETSC_FALSE; return 0;
819   }
820 
821   /* if a->j are the same */
822   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
823     *flg = PETSC_FALSE; return 0;
824   }
825 
826   /* if a->a are the same */
827   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
828     *flg = PETSC_FALSE; return 0;
829   }
830   *flg = PETSC_TRUE;
831   return 0;
832 
833 }
834 
835 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
836 {
837   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
838   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
839   Scalar      *x, zero = 0.0,*aa,*aa_j;
840 
841   bs   = a->bs;
842   aa   = a->a;
843   ai   = a->i;
844   aj   = a->j;
845   ambs = a->mbs;
846   bs2  = a->bs2;
847 
848   VecSet(&zero,v);
849   VecGetArray(v,&x); VecGetLocalSize(v,&n);
850   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
851   for ( i=0; i<ambs; i++ ) {
852     for ( j=ai[i]; j<ai[i+1]; j++ ) {
853       if (aj[j] == i) {
854         row  = i*bs;
855         aa_j = aa+j*bs2;
856         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
857         break;
858       }
859     }
860   }
861   return 0;
862 }
863 
864 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
865 {
866   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
867   Scalar      *l,*r,x,*v,*aa,*li,*ri;
868   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
869 
870   ai  = a->i;
871   aj  = a->j;
872   aa  = a->a;
873   m   = a->m;
874   n   = a->n;
875   bs  = a->bs;
876   mbs = a->mbs;
877   bs2 = a->bs2;
878   if (ll) {
879     VecGetArray(ll,&l); VecGetSize(ll,&lm);
880     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
881     for ( i=0; i<mbs; i++ ) { /* for each block row */
882       M  = ai[i+1] - ai[i];
883       li = l + i*bs;
884       v  = aa + bs2*ai[i];
885       for ( j=0; j<M; j++ ) { /* for each block */
886         for ( k=0; k<bs2; k++ ) {
887           (*v++) *= li[k%bs];
888         }
889       }
890     }
891   }
892 
893   if (rr) {
894     VecGetArray(rr,&r); VecGetSize(rr,&rn);
895     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
896     for ( i=0; i<mbs; i++ ) { /* for each block row */
897       M  = ai[i+1] - ai[i];
898       v  = aa + bs2*ai[i];
899       for ( j=0; j<M; j++ ) { /* for each block */
900         ri = r + bs*aj[ai[i]+j];
901         for ( k=0; k<bs; k++ ) {
902           x = ri[k];
903           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
904         }
905       }
906     }
907   }
908   return 0;
909 }
910 
911 
912 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
913 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
914 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
915 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
916 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
917 
918 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
919 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
920 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
921 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
922 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
923 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
924 
925 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
926 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
927 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
928 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
929 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
930 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
931 
932 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
933 {
934   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
935   Scalar      *v = a->a;
936   double      sum = 0.0;
937   int         i,nz=a->nz,bs2=a->bs2;
938 
939   if (type == NORM_FROBENIUS) {
940     for (i=0; i< bs2*nz; i++ ) {
941 #if defined(PETSC_COMPLEX)
942       sum += real(conj(*v)*(*v)); v++;
943 #else
944       sum += (*v)*(*v); v++;
945 #endif
946     }
947     *norm = sqrt(sum);
948   }
949   else {
950     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
951   }
952   return 0;
953 }
954 
955 /*
956      note: This can only work for identity for row and col. It would
957    be good to check this and otherwise generate an error.
958 */
959 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
960 {
961   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
962   Mat         outA;
963   int         ierr;
964 
965   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
966 
967   outA          = inA;
968   inA->factor   = FACTOR_LU;
969   a->row        = row;
970   a->col        = col;
971 
972   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
973 
974   if (!a->diag) {
975     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
976   }
977   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
978   return 0;
979 }
980 
981 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
982 {
983   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
984   int         one = 1, totalnz = a->bs2*a->nz;
985   BLscal_( &totalnz, alpha, a->a, &one );
986   PLogFlops(totalnz);
987   return 0;
988 }
989 
990 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
991 {
992   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
993   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
994   int        *ai = a->i, *ailen = a->ilen;
995   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
996   Scalar     *ap, *aa = a->a, zero = 0.0;
997 
998   for ( k=0; k<m; k++ ) { /* loop over rows */
999     row  = im[k]; brow = row/bs;
1000     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1001     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1002     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1003     nrow = ailen[brow];
1004     for ( l=0; l<n; l++ ) { /* loop over columns */
1005       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1006       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1007       col  = in[l] ;
1008       bcol = col/bs;
1009       cidx = col%bs;
1010       ridx = row%bs;
1011       high = nrow;
1012       low  = 0; /* assume unsorted */
1013       while (high-low > 5) {
1014         t = (low+high)/2;
1015         if (rp[t] > bcol) high = t;
1016         else             low  = t;
1017       }
1018       for ( i=low; i<high; i++ ) {
1019         if (rp[i] > bcol) break;
1020         if (rp[i] == bcol) {
1021           *v++ = ap[bs2*i+bs*cidx+ridx];
1022           goto finished;
1023         }
1024       }
1025       *v++ = zero;
1026       finished:;
1027     }
1028   }
1029   return 0;
1030 }
1031 
1032 /* -------------------------------------------------------------------*/
1033 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1034        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1035        MatMult_SeqBAIJ,0,
1036        MatMultTrans_SeqBAIJ,0,
1037        MatSolve_SeqBAIJ_N,0,
1038        0,0,
1039        MatLUFactor_SeqBAIJ,0,
1040        0,
1041        MatTranspose_SeqBAIJ,
1042        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1043        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1044        0,MatAssemblyEnd_SeqBAIJ,
1045        0,
1046        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1047        MatGetReordering_SeqBAIJ,
1048        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1049        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1050        MatILUFactorSymbolic_SeqBAIJ,0,
1051        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1052        MatGetSubMatrix_SeqBAIJ,0,
1053        MatConvertSameType_SeqBAIJ,0,0,
1054        MatILUFactor_SeqBAIJ,0,0,
1055        0,MatIncreaseOverlap_SeqBAIJ,
1056        MatGetValues_SeqBAIJ,0,
1057        0,MatScale_SeqBAIJ,
1058        0};
1059 
1060 /*@C
1061    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1062    compressed row) format.  For good matrix assembly performance the
1063    user should preallocate the matrix storage by setting the parameter nz
1064    (or nzz).  By setting these parameters accurately, performance can be
1065    increased by more than a factor of 50.
1066 
1067    Input Parameters:
1068 .  comm - MPI communicator, set to MPI_COMM_SELF
1069 .  bs - size of block
1070 .  m - number of rows
1071 .  n - number of columns
1072 .  nz - number of block nonzeros per block row (same for all rows)
1073 .  nzz - number of block nonzeros per block row or PETSC_NULL
1074          (possibly different for each row)
1075 
1076    Output Parameter:
1077 .  A - the matrix
1078 
1079    Notes:
1080    The block AIJ format is fully compatible with standard Fortran 77
1081    storage.  That is, the stored row and column indices can begin at
1082    either one (as in Fortran) or zero.  See the users' manual for details.
1083 
1084    Specify the preallocated storage with either nz or nnz (not both).
1085    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1086    allocation.  For additional details, see the users manual chapter on
1087    matrices and the file $(PETSC_DIR)/Performance.
1088 
1089 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1090 @*/
1091 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1092 {
1093   Mat         B;
1094   Mat_SeqBAIJ *b;
1095   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1096 
1097   if (mbs*bs!=m || nbs*bs!=n)
1098     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1099 
1100   *A = 0;
1101   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1102   PLogObjectCreate(B);
1103   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1104   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1105   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1106   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1107   if (!flg) {
1108     switch (bs) {
1109       case 1:
1110         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1111         B->ops.solve           = MatSolve_SeqBAIJ_1;
1112         break;
1113       case 2:
1114         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1115         B->ops.solve           = MatSolve_SeqBAIJ_2;
1116         break;
1117       case 3:
1118         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1119         B->ops.solve           = MatSolve_SeqBAIJ_3;
1120         break;
1121       case 4:
1122         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1123         B->ops.solve           = MatSolve_SeqBAIJ_4;
1124         break;
1125       case 5:
1126         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1127         B->ops.solve           = MatSolve_SeqBAIJ_5;
1128         break;
1129     }
1130   }
1131   B->destroy          = MatDestroy_SeqBAIJ;
1132   B->view             = MatView_SeqBAIJ;
1133   B->factor           = 0;
1134   B->lupivotthreshold = 1.0;
1135   b->row              = 0;
1136   b->col              = 0;
1137   b->reallocs         = 0;
1138 
1139   b->m       = m; B->m = m; B->M = m;
1140   b->n       = n; B->n = n; B->N = n;
1141   b->mbs     = mbs;
1142   b->nbs     = nbs;
1143   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1144   if (nnz == PETSC_NULL) {
1145     if (nz == PETSC_DEFAULT) nz = 5;
1146     else if (nz <= 0)        nz = 1;
1147     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1148     nz = nz*mbs;
1149   }
1150   else {
1151     nz = 0;
1152     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1153   }
1154 
1155   /* allocate the matrix space */
1156   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1157   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1158   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1159   b->j  = (int *) (b->a + nz*bs2);
1160   PetscMemzero(b->j,nz*sizeof(int));
1161   b->i  = b->j + nz;
1162   b->singlemalloc = 1;
1163 
1164   b->i[0] = 0;
1165   for (i=1; i<mbs+1; i++) {
1166     b->i[i] = b->i[i-1] + b->imax[i-1];
1167   }
1168 
1169   /* b->ilen will count nonzeros in each block row so far. */
1170   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1171   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1172   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1173 
1174   b->bs               = bs;
1175   b->bs2              = bs2;
1176   b->mbs              = mbs;
1177   b->nz               = 0;
1178   b->maxnz            = nz;
1179   b->sorted           = 0;
1180   b->roworiented      = 1;
1181   b->nonew            = 0;
1182   b->diag             = 0;
1183   b->solve_work       = 0;
1184   b->mult_work        = 0;
1185   b->spptr            = 0;
1186   *A = B;
1187   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1188   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1189   return 0;
1190 }
1191 
1192 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1193 {
1194   Mat         C;
1195   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1196   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1197 
1198   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1199 
1200   *B = 0;
1201   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1202   PLogObjectCreate(C);
1203   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1204   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1205   C->destroy    = MatDestroy_SeqBAIJ;
1206   C->view       = MatView_SeqBAIJ;
1207   C->factor     = A->factor;
1208   c->row        = 0;
1209   c->col        = 0;
1210   C->assembled  = PETSC_TRUE;
1211 
1212   c->m = C->m   = a->m;
1213   c->n = C->n   = a->n;
1214   C->M          = a->m;
1215   C->N          = a->n;
1216 
1217   c->bs         = a->bs;
1218   c->bs2        = a->bs2;
1219   c->mbs        = a->mbs;
1220   c->nbs        = a->nbs;
1221 
1222   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1223   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1224   for ( i=0; i<mbs; i++ ) {
1225     c->imax[i] = a->imax[i];
1226     c->ilen[i] = a->ilen[i];
1227   }
1228 
1229   /* allocate the matrix space */
1230   c->singlemalloc = 1;
1231   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1232   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1233   c->j  = (int *) (c->a + nz*bs2);
1234   c->i  = c->j + nz;
1235   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1236   if (mbs > 0) {
1237     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1238     if (cpvalues == COPY_VALUES) {
1239       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1240     }
1241   }
1242 
1243   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1244   c->sorted      = a->sorted;
1245   c->roworiented = a->roworiented;
1246   c->nonew       = a->nonew;
1247 
1248   if (a->diag) {
1249     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1250     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1251     for ( i=0; i<mbs; i++ ) {
1252       c->diag[i] = a->diag[i];
1253     }
1254   }
1255   else c->diag          = 0;
1256   c->nz                 = a->nz;
1257   c->maxnz              = a->maxnz;
1258   c->solve_work         = 0;
1259   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1260   c->mult_work          = 0;
1261   *B = C;
1262   return 0;
1263 }
1264 
1265 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1266 {
1267   Mat_SeqBAIJ  *a;
1268   Mat          B;
1269   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1270   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1271   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1272   int          *masked, nmask,tmp,bs2,ishift;
1273   Scalar       *aa;
1274   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1275 
1276   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1277   bs2  = bs*bs;
1278 
1279   MPI_Comm_size(comm,&size);
1280   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1281   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1282   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1283   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1284   M = header[1]; N = header[2]; nz = header[3];
1285 
1286   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1287 
1288   /*
1289      This code adds extra rows to make sure the number of rows is
1290     divisible by the blocksize
1291   */
1292   mbs        = M/bs;
1293   extra_rows = bs - M + bs*(mbs);
1294   if (extra_rows == bs) extra_rows = 0;
1295   else                  mbs++;
1296   if (extra_rows) {
1297     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1298   }
1299 
1300   /* read in row lengths */
1301   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1302   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1303   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1304 
1305   /* read in column indices */
1306   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1307   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1308   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1309 
1310   /* loop over row lengths determining block row lengths */
1311   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1312   PetscMemzero(browlengths,mbs*sizeof(int));
1313   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1314   PetscMemzero(mask,mbs*sizeof(int));
1315   masked = mask + mbs;
1316   rowcount = 0; nzcount = 0;
1317   for ( i=0; i<mbs; i++ ) {
1318     nmask = 0;
1319     for ( j=0; j<bs; j++ ) {
1320       kmax = rowlengths[rowcount];
1321       for ( k=0; k<kmax; k++ ) {
1322         tmp = jj[nzcount++]/bs;
1323         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1324       }
1325       rowcount++;
1326     }
1327     browlengths[i] += nmask;
1328     /* zero out the mask elements we set */
1329     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1330   }
1331 
1332   /* create our matrix */
1333   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1334          CHKERRQ(ierr);
1335   B = *A;
1336   a = (Mat_SeqBAIJ *) B->data;
1337 
1338   /* set matrix "i" values */
1339   a->i[0] = 0;
1340   for ( i=1; i<= mbs; i++ ) {
1341     a->i[i]      = a->i[i-1] + browlengths[i-1];
1342     a->ilen[i-1] = browlengths[i-1];
1343   }
1344   a->nz         = 0;
1345   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1346 
1347   /* read in nonzero values */
1348   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1349   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1350   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1351 
1352   /* set "a" and "j" values into matrix */
1353   nzcount = 0; jcount = 0;
1354   for ( i=0; i<mbs; i++ ) {
1355     nzcountb = nzcount;
1356     nmask    = 0;
1357     for ( j=0; j<bs; j++ ) {
1358       kmax = rowlengths[i*bs+j];
1359       for ( k=0; k<kmax; k++ ) {
1360         tmp = jj[nzcount++]/bs;
1361 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1362       }
1363       rowcount++;
1364     }
1365     /* sort the masked values */
1366     PetscSortInt(nmask,masked);
1367 
1368     /* set "j" values into matrix */
1369     maskcount = 1;
1370     for ( j=0; j<nmask; j++ ) {
1371       a->j[jcount++]  = masked[j];
1372       mask[masked[j]] = maskcount++;
1373     }
1374     /* set "a" values into matrix */
1375     ishift = bs2*a->i[i];
1376     for ( j=0; j<bs; j++ ) {
1377       kmax = rowlengths[i*bs+j];
1378       for ( k=0; k<kmax; k++ ) {
1379         tmp    = jj[nzcountb]/bs ;
1380         block  = mask[tmp] - 1;
1381         point  = jj[nzcountb] - bs*tmp;
1382         idx    = ishift + bs2*block + j + bs*point;
1383         a->a[idx] = aa[nzcountb++];
1384       }
1385     }
1386     /* zero out the mask elements we set */
1387     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1388   }
1389   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1390 
1391   PetscFree(rowlengths);
1392   PetscFree(browlengths);
1393   PetscFree(aa);
1394   PetscFree(jj);
1395   PetscFree(mask);
1396 
1397   B->assembled = PETSC_TRUE;
1398 
1399   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1400   if (flg) {
1401     Viewer tviewer;
1402     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1403     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1404     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1405     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1406   }
1407   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1408   if (flg) {
1409     Viewer tviewer;
1410     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1411     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1412     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1413     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1414   }
1415   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1416   if (flg) {
1417     Viewer tviewer;
1418     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1419     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1420     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1421   }
1422   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1423   if (flg) {
1424     Viewer tviewer;
1425     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1426     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1427     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1428     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1429   }
1430   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1431   if (flg) {
1432     Viewer tviewer;
1433     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1434     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1435     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1436     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1437   }
1438   return 0;
1439 }
1440 
1441 
1442 
1443