xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2a136a1a354052aebd0b0c413dfefd7fda97c4da)
1 /*$Id: baij.c,v 1.199 2000/02/02 20:09:09 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
37 int MatMarkDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
40   int         i,j,*diag,m = a->mbs;
41 
42   PetscFunctionBegin;
43   if (a->diag) PetscFunctionReturn(0);
44 
45   diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag);
46   PLogObjectMemory(A,(m+1)*sizeof(int));
47   for (i=0; i<m; i++) {
48     diag[i] = a->i[i+1];
49     for (j=a->i[i]; j<a->i[i+1]; j++) {
50       if (a->j[j] == i) {
51         diag[i] = j;
52         break;
53       }
54     }
55   }
56   a->diag = diag;
57   PetscFunctionReturn(0);
58 }
59 
60 
61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
62 
63 #undef __FUNC__
64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
66 {
67   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
68   int         ierr,n = a->mbs,i;
69 
70   PetscFunctionBegin;
71   *nn = n;
72   if (!ia) PetscFunctionReturn(0);
73   if (symmetric) {
74     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
75   } else if (oshift == 1) {
76     /* temporarily add 1 to i and j indices */
77     int nz = a->i[n] + 1;
78     for (i=0; i<nz; i++) a->j[i]++;
79     for (i=0; i<n+1; i++) a->i[i]++;
80     *ia = a->i; *ja = a->j;
81   } else {
82     *ia = a->i; *ja = a->j;
83   }
84 
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
91                                 PetscTruth *done)
92 {
93   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
94   int         i,n = a->mbs,ierr;
95 
96   PetscFunctionBegin;
97   if (!ia) PetscFunctionReturn(0);
98   if (symmetric) {
99     ierr = PetscFree(*ia);CHKERRQ(ierr);
100     ierr = PetscFree(*ja);CHKERRQ(ierr);
101   } else if (oshift == 1) {
102     int nz = a->i[n];
103     for (i=0; i<nz; i++) a->j[i]--;
104     for (i=0; i<n+1; i++) a->i[i]--;
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNC__
110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
112 {
113   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
114 
115   PetscFunctionBegin;
116   *bs = baij->bs;
117   PetscFunctionReturn(0);
118 }
119 
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatDestroy_SeqBAIJ"
123 int MatDestroy_SeqBAIJ(Mat A)
124 {
125   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126   int         ierr;
127 
128   PetscFunctionBegin;
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(PETSC_USE_LOG)
142   PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162   ierr = PetscFree(a);CHKERRQ(ierr);
163   PLogObjectDestroy(A);
164   PetscHeaderDestroy(A);
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNC__
169 #define __FUNC__ "MatSetOption_SeqBAIJ"
170 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173 
174   PetscFunctionBegin;
175   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
176   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
177   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
178   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
179   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
180   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
181   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
182   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
183   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
184   else if (op == MAT_ROWS_SORTED ||
185            op == MAT_ROWS_UNSORTED ||
186            op == MAT_SYMMETRIC ||
187            op == MAT_STRUCTURALLY_SYMMETRIC ||
188            op == MAT_YES_NEW_DIAGONALS ||
189            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
190            op == MAT_USE_HASH_TABLE) {
191     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
192   } else if (op == MAT_NO_NEW_DIAGONALS) {
193     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
194   } else {
195     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 
201 #undef __FUNC__
202 #define __FUNC__ "MatGetSize_SeqBAIJ"
203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
204 {
205   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
206 
207   PetscFunctionBegin;
208   if (m) *m = a->m;
209   if (n) *n = a->n;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218 
219   PetscFunctionBegin;
220   if (m) *m = 0;
221   if (n) *n = a->m;
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNC__
226 #define __FUNC__ "MatGetRow_SeqBAIJ"
227 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
228 {
229   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
230   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
231   MatScalar    *aa,*aa_i;
232   Scalar       *v_i;
233 
234   PetscFunctionBegin;
235   bs  = a->bs;
236   ai  = a->i;
237   aj  = a->j;
238   aa  = a->a;
239   bs2 = a->bs2;
240 
241   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
242 
243   bn  = row/bs;   /* Block number */
244   bp  = row % bs; /* Block Position */
245   M   = ai[bn+1] - ai[bn];
246   *nz = bs*M;
247 
248   if (v) {
249     *v = 0;
250     if (*nz) {
251       *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v);
252       for (i=0; i<M; i++) { /* for each block in the block row */
253         v_i  = *v + i*bs;
254         aa_i = aa + bs2*(ai[bn] + i);
255         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
256       }
257     }
258   }
259 
260   if (idx) {
261     *idx = 0;
262     if (*nz) {
263       *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx);
264       for (i=0; i<M; i++) { /* for each block in the block row */
265         idx_i = *idx + i*bs;
266         itmp  = bs*aj[ai[bn] + i];
267         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNC__
275 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
276 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
277 {
278   int ierr;
279 
280   PetscFunctionBegin;
281   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
282   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNC__
287 #define __FUNC__ "MatTranspose_SeqBAIJ"
288 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
289 {
290   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
291   Mat         C;
292   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
293   int         *rows,*cols,bs2=a->bs2,refct;
294   Scalar      *array;
295 
296   PetscFunctionBegin;
297   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
298   col  = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
299   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
300 
301 #if defined(PETSC_USE_MAT_SINGLE)
302   array = (Scalar *)PetscMalloc(a->bs2*a->nz*sizeof(Scalar));CHKPTRQ(array);
303   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
304 #else
305   *array = a->a;
306 #endif
307 
308   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
309   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
310   ierr = PetscFree(col);CHKERRQ(ierr);
311   rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
312   cols = rows + bs;
313   for (i=0; i<mbs; i++) {
314     cols[0] = i*bs;
315     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
316     len = ai[i+1] - ai[i];
317     for (j=0; j<len; j++) {
318       rows[0] = (*aj++)*bs;
319       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
320       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
321       array += bs2;
322     }
323   }
324   ierr = PetscFree(rows);CHKERRQ(ierr);
325 #if defined(PETSC_USE_MAT_SINGLE)
326   ierr = PetscFree(array);
327 #endif
328 
329   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   if (B) {
333     *B = C;
334   } else {
335     PetscOps *Abops;
336     MatOps   Aops;
337 
338     /* This isn't really an in-place transpose */
339     ierr = PetscFree(a->a);CHKERRQ(ierr);
340     if (!a->singlemalloc) {
341       ierr = PetscFree(a->i);CHKERRQ(ierr);
342       ierr = PetscFree(a->j);CHKERRQ(ierr);
343     }
344     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
345     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
346     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
347     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
348     ierr = PetscFree(a);CHKERRQ(ierr);
349 
350 
351     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
352     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
353 
354     /*
355        This is horrible, horrible code. We need to keep the
356       A pointers for the bops and ops but copy everything
357       else from C.
358     */
359     Abops    = A->bops;
360     Aops     = A->ops;
361     refct    = A->refct;
362     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
363     A->bops  = Abops;
364     A->ops   = Aops;
365     A->qlist = 0;
366     A->refct = refct;
367 
368     PetscHeaderDestroy(C);
369   }
370   PetscFunctionReturn(0);
371 }
372 
373 #undef __FUNC__
374 #define __FUNC__ "MatView_SeqBAIJ_Binary"
375 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
376 {
377   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
378   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
379   Scalar      *aa;
380   FILE        *file;
381 
382   PetscFunctionBegin;
383   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
384   col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
385   col_lens[0] = MAT_COOKIE;
386 
387   col_lens[1] = a->m;
388   col_lens[2] = a->n;
389   col_lens[3] = a->nz*bs2;
390 
391   /* store lengths of each row and write (including header) to file */
392   count = 0;
393   for (i=0; i<a->mbs; i++) {
394     for (j=0; j<bs; j++) {
395       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
396     }
397   }
398   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
399   ierr = PetscFree(col_lens);CHKERRQ(ierr);
400 
401   /* store column indices (zero start index) */
402   jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj);
403   count = 0;
404   for (i=0; i<a->mbs; i++) {
405     for (j=0; j<bs; j++) {
406       for (k=a->i[i]; k<a->i[i+1]; k++) {
407         for (l=0; l<bs; l++) {
408           jj[count++] = bs*a->j[k] + l;
409         }
410       }
411     }
412   }
413   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
414   ierr = PetscFree(jj);CHKERRQ(ierr);
415 
416   /* store nonzero values */
417   aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
418   count = 0;
419   for (i=0; i<a->mbs; i++) {
420     for (j=0; j<bs; j++) {
421       for (k=a->i[i]; k<a->i[i+1]; k++) {
422         for (l=0; l<bs; l++) {
423           aa[count++] = a->a[bs2*k + l*bs + j];
424         }
425       }
426     }
427   }
428   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
429   ierr = PetscFree(aa);CHKERRQ(ierr);
430 
431   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
432   if (file) {
433     fprintf(file,"-matload_block_size %d\n",a->bs);
434   }
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
440 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
441 {
442   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
443   int         ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2;
444   char        *outputname;
445 
446   PetscFunctionBegin;
447   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
448   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
449   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
450     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
451   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
452     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
453   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
454     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
455     for (i=0; i<a->mbs; i++) {
456       for (j=0; j<bs; j++) {
457         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
458         for (k=a->i[i]; k<a->i[i+1]; k++) {
459           for (l=0; l<bs; l++) {
460 #if defined(PETSC_USE_COMPLEX)
461             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
462               ierr = ViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
463                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
464             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
465               ierr = ViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
466                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
467             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
468               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
469             }
470 #else
471             if (a->a[bs2*k + l*bs + j] != 0.0) {
472               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
473             }
474 #endif
475           }
476         }
477         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
478       }
479     }
480     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
481   } else {
482     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
483     for (i=0; i<a->mbs; i++) {
484       for (j=0; j<bs; j++) {
485         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
486         for (k=a->i[i]; k<a->i[i+1]; k++) {
487           for (l=0; l<bs; l++) {
488 #if defined(PETSC_USE_COMPLEX)
489             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
490               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
491                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
492             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
493               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
494                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
495             } else {
496               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
497             }
498 #else
499             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
500 #endif
501           }
502         }
503         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
504       }
505     }
506     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
507   }
508   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNC__
513 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
514 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
515 {
516   Mat          A = (Mat) Aa;
517   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
518   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
519   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
520   MatScalar    *aa;
521   Viewer       viewer;
522 
523   PetscFunctionBegin;
524 
525   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
526   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
527 
528   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
529 
530   /* loop over matrix elements drawing boxes */
531   color = DRAW_BLUE;
532   for (i=0,row=0; i<mbs; i++,row+=bs) {
533     for (j=a->i[i]; j<a->i[i+1]; j++) {
534       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
535       x_l = a->j[j]*bs; x_r = x_l + 1.0;
536       aa = a->a + j*bs2;
537       for (k=0; k<bs; k++) {
538         for (l=0; l<bs; l++) {
539           if (PetscRealPart(*aa++) >=  0.) continue;
540           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
541         }
542       }
543     }
544   }
545   color = DRAW_CYAN;
546   for (i=0,row=0; i<mbs; i++,row+=bs) {
547     for (j=a->i[i]; j<a->i[i+1]; j++) {
548       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
549       x_l = a->j[j]*bs; x_r = x_l + 1.0;
550       aa = a->a + j*bs2;
551       for (k=0; k<bs; k++) {
552         for (l=0; l<bs; l++) {
553           if (PetscRealPart(*aa++) != 0.) continue;
554           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
555         }
556       }
557     }
558   }
559 
560   color = DRAW_RED;
561   for (i=0,row=0; i<mbs; i++,row+=bs) {
562     for (j=a->i[i]; j<a->i[i+1]; j++) {
563       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
564       x_l = a->j[j]*bs; x_r = x_l + 1.0;
565       aa = a->a + j*bs2;
566       for (k=0; k<bs; k++) {
567         for (l=0; l<bs; l++) {
568           if (PetscRealPart(*aa++) <= 0.) continue;
569           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
570         }
571       }
572     }
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNC__
578 #define __FUNC__ "MatView_SeqBAIJ_Draw"
579 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
580 {
581   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
582   int          ierr;
583   PetscReal    xl,yl,xr,yr,w,h;
584   Draw         draw;
585   PetscTruth   isnull;
586 
587   PetscFunctionBegin;
588 
589   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
590   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
591 
592   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
593   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
594   xr += w;    yr += h;  xl = -w;     yl = -h;
595   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
596   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
597   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
598   PetscFunctionReturn(0);
599 }
600 
601 #undef __FUNC__
602 #define __FUNC__ "MatView_SeqBAIJ"
603 int MatView_SeqBAIJ(Mat A,Viewer viewer)
604 {
605   int        ierr;
606   PetscTruth issocket,isascii,isbinary,isdraw;
607 
608   PetscFunctionBegin;
609   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
610   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
611   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
612   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
613   if (issocket) {
614     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
615   } else if (isascii){
616     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
617   } else if (isbinary) {
618     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
619   } else if (isdraw) {
620     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
621   } else {
622     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatGetValues_SeqBAIJ"
630 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
631 {
632   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
633   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
634   int        *ai = a->i,*ailen = a->ilen;
635   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
636   MatScalar  *ap,*aa = a->a,zero = 0.0;
637 
638   PetscFunctionBegin;
639   for (k=0; k<m; k++) { /* loop over rows */
640     row  = im[k]; brow = row/bs;
641     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
642     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
643     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
644     nrow = ailen[brow];
645     for (l=0; l<n; l++) { /* loop over columns */
646       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
647       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
648       col  = in[l] ;
649       bcol = col/bs;
650       cidx = col%bs;
651       ridx = row%bs;
652       high = nrow;
653       low  = 0; /* assume unsorted */
654       while (high-low > 5) {
655         t = (low+high)/2;
656         if (rp[t] > bcol) high = t;
657         else             low  = t;
658       }
659       for (i=low; i<high; i++) {
660         if (rp[i] > bcol) break;
661         if (rp[i] == bcol) {
662           *v++ = ap[bs2*i+bs*cidx+ridx];
663           goto finished;
664         }
665       }
666       *v++ = zero;
667       finished:;
668     }
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
676 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
677 {
678   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
679   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
680   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
681   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
682   Scalar      *value = v;
683   MatScalar   *ap,*aa=a->a,*bap;
684 
685   PetscFunctionBegin;
686   if (roworiented) {
687     stepval = (n-1)*bs;
688   } else {
689     stepval = (m-1)*bs;
690   }
691   for (k=0; k<m; k++) { /* loop over added rows */
692     row  = im[k];
693     if (row < 0) continue;
694 #if defined(PETSC_USE_BOPT_g)
695     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
696 #endif
697     rp   = aj + ai[row];
698     ap   = aa + bs2*ai[row];
699     rmax = imax[row];
700     nrow = ailen[row];
701     low  = 0;
702     for (l=0; l<n; l++) { /* loop over added columns */
703       if (in[l] < 0) continue;
704 #if defined(PETSC_USE_BOPT_g)
705       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
706 #endif
707       col = in[l];
708       if (roworiented) {
709         value = v + k*(stepval+bs)*bs + l*bs;
710       } else {
711         value = v + l*(stepval+bs)*bs + k*bs;
712       }
713       if (!sorted) low = 0; high = nrow;
714       while (high-low > 7) {
715         t = (low+high)/2;
716         if (rp[t] > col) high = t;
717         else             low  = t;
718       }
719       for (i=low; i<high; i++) {
720         if (rp[i] > col) break;
721         if (rp[i] == col) {
722           bap  = ap +  bs2*i;
723           if (roworiented) {
724             if (is == ADD_VALUES) {
725               for (ii=0; ii<bs; ii++,value+=stepval) {
726                 for (jj=ii; jj<bs2; jj+=bs) {
727                   bap[jj] += *value++;
728                 }
729               }
730             } else {
731               for (ii=0; ii<bs; ii++,value+=stepval) {
732                 for (jj=ii; jj<bs2; jj+=bs) {
733                   bap[jj] = *value++;
734                 }
735               }
736             }
737           } else {
738             if (is == ADD_VALUES) {
739               for (ii=0; ii<bs; ii++,value+=stepval) {
740                 for (jj=0; jj<bs; jj++) {
741                   *bap++ += *value++;
742                 }
743               }
744             } else {
745               for (ii=0; ii<bs; ii++,value+=stepval) {
746                 for (jj=0; jj<bs; jj++) {
747                   *bap++  = *value++;
748                 }
749               }
750             }
751           }
752           goto noinsert2;
753         }
754       }
755       if (nonew == 1) goto noinsert2;
756       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
757       if (nrow >= rmax) {
758         /* there is no extra room in row, therefore enlarge */
759         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
760         MatScalar *new_a;
761 
762         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
763 
764         /* malloc new storage space */
765         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
766         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
767         new_j   = (int*)(new_a + bs2*new_nz);
768         new_i   = new_j + new_nz;
769 
770         /* copy over old data into new slots */
771         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
772         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
773         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
774         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
775         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
776         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
777         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
778         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
779         /* free up old matrix storage */
780         ierr = PetscFree(a->a);CHKERRQ(ierr);
781         if (!a->singlemalloc) {
782           ierr = PetscFree(a->i);CHKERRQ(ierr);
783           ierr = PetscFree(a->j);CHKERRQ(ierr);
784         }
785         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
786         a->singlemalloc = PETSC_TRUE;
787 
788         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
789         rmax = imax[row] = imax[row] + CHUNKSIZE;
790         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
791         a->maxnz += bs2*CHUNKSIZE;
792         a->reallocs++;
793         a->nz++;
794       }
795       N = nrow++ - 1;
796       /* shift up all the later entries in this row */
797       for (ii=N; ii>=i; ii--) {
798         rp[ii+1] = rp[ii];
799         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
800       }
801       if (N >= i) {
802         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
803       }
804       rp[i] = col;
805       bap   = ap +  bs2*i;
806       if (roworiented) {
807         for (ii=0; ii<bs; ii++,value+=stepval) {
808           for (jj=ii; jj<bs2; jj+=bs) {
809             bap[jj] = *value++;
810           }
811         }
812       } else {
813         for (ii=0; ii<bs; ii++,value+=stepval) {
814           for (jj=0; jj<bs; jj++) {
815             *bap++  = *value++;
816           }
817         }
818       }
819       noinsert2:;
820       low = i;
821     }
822     ailen[row] = nrow;
823   }
824   PetscFunctionReturn(0);
825 }
826 
827 
828 #undef __FUNC__
829 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
830 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
831 {
832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
833   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
834   int        m = a->m,*ip,N,*ailen = a->ilen;
835   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
836   MatScalar  *aa = a->a,*ap;
837 
838   PetscFunctionBegin;
839   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
840 
841   if (m) rmax = ailen[0];
842   for (i=1; i<mbs; i++) {
843     /* move each row back by the amount of empty slots (fshift) before it*/
844     fshift += imax[i-1] - ailen[i-1];
845     rmax   = PetscMax(rmax,ailen[i]);
846     if (fshift) {
847       ip = aj + ai[i]; ap = aa + bs2*ai[i];
848       N = ailen[i];
849       for (j=0; j<N; j++) {
850         ip[j-fshift] = ip[j];
851         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
852       }
853     }
854     ai[i] = ai[i-1] + ailen[i-1];
855   }
856   if (mbs) {
857     fshift += imax[mbs-1] - ailen[mbs-1];
858     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
859   }
860   /* reset ilen and imax for each row */
861   for (i=0; i<mbs; i++) {
862     ailen[i] = imax[i] = ai[i+1] - ai[i];
863   }
864   a->nz = ai[mbs];
865 
866   /* diagonals may have moved, so kill the diagonal pointers */
867   if (fshift && a->diag) {
868     ierr = PetscFree(a->diag);CHKERRQ(ierr);
869     PLogObjectMemory(A,-(m+1)*sizeof(int));
870     a->diag = 0;
871   }
872   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
873            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
874   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
875            a->reallocs);
876   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
877   a->reallocs          = 0;
878   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
879 
880   PetscFunctionReturn(0);
881 }
882 
883 
884 
885 /*
886    This function returns an array of flags which indicate the locations of contiguous
887    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
888    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
889    Assume: sizes should be long enough to hold all the values.
890 */
891 #undef __FUNC__
892 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
893 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
894 {
895   int        i,j,k,row;
896   PetscTruth flg;
897 
898   PetscFunctionBegin;
899   for (i=0,j=0; i<n; j++) {
900     row = idx[i];
901     if (row%bs!=0) { /* Not the begining of a block */
902       sizes[j] = 1;
903       i++;
904     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
905       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
906       i++;
907     } else { /* Begining of the block, so check if the complete block exists */
908       flg = PETSC_TRUE;
909       for (k=1; k<bs; k++) {
910         if (row+k != idx[i+k]) { /* break in the block */
911           flg = PETSC_FALSE;
912           break;
913         }
914       }
915       if (flg == PETSC_TRUE) { /* No break in the bs */
916         sizes[j] = bs;
917         i+= bs;
918       } else {
919         sizes[j] = 1;
920         i++;
921       }
922     }
923   }
924   *bs_max = j;
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNC__
929 #define __FUNC__ "MatZeroRows_SeqBAIJ"
930 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
931 {
932   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
933   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
934   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
935   Scalar      zero = 0.0;
936   MatScalar   *aa;
937 
938   PetscFunctionBegin;
939   /* Make a copy of the IS and  sort it */
940   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
941   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
942 
943   /* allocate memory for rows,sizes */
944   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
945   sizes = rows + is_n;
946 
947   /* initialize copy IS valurs to rows, and sort them */
948   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
949   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
950   if (baij->keepzeroedrows) {
951     for (i=0; i<is_n; i++) { sizes[i] = 1; }
952     bs_max = is_n;
953   } else {
954     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
955   }
956   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
957 
958   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
959     row   = rows[j];
960     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
961     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
962     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
963     if (sizes[i] == bs && !baij->keepzeroedrows) {
964       if (diag) {
965         if (baij->ilen[row/bs] > 0) {
966           baij->ilen[row/bs] = 1;
967           baij->j[baij->i[row/bs]] = row/bs;
968           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
969         }
970         /* Now insert all the diagoanl values for this bs */
971         for (k=0; k<bs; k++) {
972           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
973         }
974       } else { /* (!diag) */
975         baij->ilen[row/bs] = 0;
976       } /* end (!diag) */
977     } else { /* (sizes[i] != bs) */
978 #if defined (PETSC_USE_DEBUG)
979       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
980 #endif
981       for (k=0; k<count; k++) {
982         aa[0] = zero;
983         aa+=bs;
984       }
985       if (diag) {
986         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
987       }
988     }
989   }
990 
991   ierr = PetscFree(rows);CHKERRQ(ierr);
992   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNC__
997 #define __FUNC__ "MatSetValues_SeqBAIJ"
998 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
999 {
1000   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1001   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1002   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
1003   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1004   int         ridx,cidx,bs2=a->bs2,ierr;
1005   MatScalar   *ap,value,*aa=a->a,*bap;
1006 
1007   PetscFunctionBegin;
1008   for (k=0; k<m; k++) { /* loop over added rows */
1009     row  = im[k]; brow = row/bs;
1010     if (row < 0) continue;
1011 #if defined(PETSC_USE_BOPT_g)
1012     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1013 #endif
1014     rp   = aj + ai[brow];
1015     ap   = aa + bs2*ai[brow];
1016     rmax = imax[brow];
1017     nrow = ailen[brow];
1018     low  = 0;
1019     for (l=0; l<n; l++) { /* loop over added columns */
1020       if (in[l] < 0) continue;
1021 #if defined(PETSC_USE_BOPT_g)
1022       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1023 #endif
1024       col = in[l]; bcol = col/bs;
1025       ridx = row % bs; cidx = col % bs;
1026       if (roworiented) {
1027         value = v[l + k*n];
1028       } else {
1029         value = v[k + l*m];
1030       }
1031       if (!sorted) low = 0; high = nrow;
1032       while (high-low > 7) {
1033         t = (low+high)/2;
1034         if (rp[t] > bcol) high = t;
1035         else              low  = t;
1036       }
1037       for (i=low; i<high; i++) {
1038         if (rp[i] > bcol) break;
1039         if (rp[i] == bcol) {
1040           bap  = ap +  bs2*i + bs*cidx + ridx;
1041           if (is == ADD_VALUES) *bap += value;
1042           else                  *bap  = value;
1043           goto noinsert1;
1044         }
1045       }
1046       if (nonew == 1) goto noinsert1;
1047       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1048       if (nrow >= rmax) {
1049         /* there is no extra room in row, therefore enlarge */
1050         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1051         MatScalar *new_a;
1052 
1053         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1054 
1055         /* Malloc new storage space */
1056         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1057         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
1058         new_j   = (int*)(new_a + bs2*new_nz);
1059         new_i   = new_j + new_nz;
1060 
1061         /* copy over old data into new slots */
1062         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1063         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1064         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1065         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1066         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1067         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1068         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1069         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1070         /* free up old matrix storage */
1071         ierr = PetscFree(a->a);CHKERRQ(ierr);
1072         if (!a->singlemalloc) {
1073           ierr = PetscFree(a->i);CHKERRQ(ierr);
1074           ierr = PetscFree(a->j);CHKERRQ(ierr);
1075         }
1076         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1077         a->singlemalloc = PETSC_TRUE;
1078 
1079         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1080         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1081         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1082         a->maxnz += bs2*CHUNKSIZE;
1083         a->reallocs++;
1084         a->nz++;
1085       }
1086       N = nrow++ - 1;
1087       /* shift up all the later entries in this row */
1088       for (ii=N; ii>=i; ii--) {
1089         rp[ii+1] = rp[ii];
1090         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1091       }
1092       if (N>=i) {
1093         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1094       }
1095       rp[i]                      = bcol;
1096       ap[bs2*i + bs*cidx + ridx] = value;
1097       noinsert1:;
1098       low = i;
1099     }
1100     ailen[brow] = nrow;
1101   }
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*);
1106 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal);
1107 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1108 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1109 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1110 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1111 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1112 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1113 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1114 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1115 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1116 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1117 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1118 extern int MatZeroEntries_SeqBAIJ(Mat);
1119 
1120 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1121 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1122 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1123 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1124 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1125 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1126 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1127 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1128 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1129 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1130 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1131 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1132 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1133 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1134 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1135 
1136 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1137 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1138 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1139 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1140 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1141 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1142 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1143 
1144 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1145 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1146 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1147 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1148 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1149 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1150 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1151 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1152 
1153 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1154 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1155 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1156 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1157 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1158 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1159 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1160 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1161 
1162 #undef __FUNC__
1163 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1164 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1165 {
1166   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1167   Mat         outA;
1168   int         ierr;
1169   PetscTruth  row_identity,col_identity;
1170 
1171   PetscFunctionBegin;
1172   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1173   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1174   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1175   if (!row_identity || !col_identity) {
1176     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1177   }
1178 
1179   outA          = inA;
1180   inA->factor   = FACTOR_LU;
1181 
1182   if (!a->diag) {
1183     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1184   }
1185   /*
1186       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1187       for ILU(0) factorization with natural ordering
1188   */
1189   switch (a->bs) {
1190   case 1:
1191     inA->ops->solvetranspose   = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1192     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1193   case 2:
1194     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1195     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1196     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1197     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1198     break;
1199   case 3:
1200     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1201     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1202     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1203     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1204     break;
1205   case 4:
1206     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1207     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1208     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1209     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1210     break;
1211   case 5:
1212     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1213     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1214     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1215     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1216     break;
1217   case 6:
1218     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1219     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1220     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1221     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1222     break;
1223   case 7:
1224     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1225     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1226     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1227     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1228     break;
1229   default:
1230     a->row        = row;
1231     a->col        = col;
1232     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1233     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1234 
1235     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1236     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1237     PLogObjectParent(inA,a->icol);
1238 
1239     if (!a->solve_work) {
1240       a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1241       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1242     }
1243   }
1244 
1245   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1246 
1247   PetscFunctionReturn(0);
1248 }
1249 #undef __FUNC__
1250 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1251 int MatPrintHelp_SeqBAIJ(Mat A)
1252 {
1253   static PetscTruth called = PETSC_FALSE;
1254   MPI_Comm          comm = A->comm;
1255   int               ierr;
1256 
1257   PetscFunctionBegin;
1258   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1259   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1260   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 EXTERN_C_BEGIN
1265 #undef __FUNC__
1266 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1267 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1268 {
1269   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1270   int         i,nz,n;
1271 
1272   PetscFunctionBegin;
1273   nz = baij->maxnz;
1274   n  = baij->n;
1275   for (i=0; i<nz; i++) {
1276     baij->j[i] = indices[i];
1277   }
1278   baij->nz = nz;
1279   for (i=0; i<n; i++) {
1280     baij->ilen[i] = baij->imax[i];
1281   }
1282 
1283   PetscFunctionReturn(0);
1284 }
1285 EXTERN_C_END
1286 
1287 #undef __FUNC__
1288 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1289 /*@
1290     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1291        in the matrix.
1292 
1293   Input Parameters:
1294 +  mat - the SeqBAIJ matrix
1295 -  indices - the column indices
1296 
1297   Level: advanced
1298 
1299   Notes:
1300     This can be called if you have precomputed the nonzero structure of the
1301   matrix and want to provide it to the matrix object to improve the performance
1302   of the MatSetValues() operation.
1303 
1304     You MUST have set the correct numbers of nonzeros per row in the call to
1305   MatCreateSeqBAIJ().
1306 
1307     MUST be called before any calls to MatSetValues();
1308 
1309 @*/
1310 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1311 {
1312   int ierr,(*f)(Mat,int *);
1313 
1314   PetscFunctionBegin;
1315   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1316   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1317   if (f) {
1318     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1319   } else {
1320     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 /* -------------------------------------------------------------------*/
1326 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1327        MatGetRow_SeqBAIJ,
1328        MatRestoreRow_SeqBAIJ,
1329        MatMult_SeqBAIJ_N,
1330        MatMultAdd_SeqBAIJ_N,
1331        MatMultTranspose_SeqBAIJ,
1332        MatMultTransposeAdd_SeqBAIJ,
1333        MatSolve_SeqBAIJ_N,
1334        0,
1335        0,
1336        0,
1337        MatLUFactor_SeqBAIJ,
1338        0,
1339        0,
1340        MatTranspose_SeqBAIJ,
1341        MatGetInfo_SeqBAIJ,
1342        MatEqual_SeqBAIJ,
1343        MatGetDiagonal_SeqBAIJ,
1344        MatDiagonalScale_SeqBAIJ,
1345        MatNorm_SeqBAIJ,
1346        0,
1347        MatAssemblyEnd_SeqBAIJ,
1348        0,
1349        MatSetOption_SeqBAIJ,
1350        MatZeroEntries_SeqBAIJ,
1351        MatZeroRows_SeqBAIJ,
1352        MatLUFactorSymbolic_SeqBAIJ,
1353        MatLUFactorNumeric_SeqBAIJ_N,
1354        0,
1355        0,
1356        MatGetSize_SeqBAIJ,
1357        MatGetSize_SeqBAIJ,
1358        MatGetOwnershipRange_SeqBAIJ,
1359        MatILUFactorSymbolic_SeqBAIJ,
1360        0,
1361        0,
1362        0,
1363        MatDuplicate_SeqBAIJ,
1364        0,
1365        0,
1366        MatILUFactor_SeqBAIJ,
1367        0,
1368        0,
1369        MatGetSubMatrices_SeqBAIJ,
1370        MatIncreaseOverlap_SeqBAIJ,
1371        MatGetValues_SeqBAIJ,
1372        0,
1373        MatPrintHelp_SeqBAIJ,
1374        MatScale_SeqBAIJ,
1375        0,
1376        0,
1377        0,
1378        MatGetBlockSize_SeqBAIJ,
1379        MatGetRowIJ_SeqBAIJ,
1380        MatRestoreRowIJ_SeqBAIJ,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        MatSetValuesBlocked_SeqBAIJ,
1388        MatGetSubMatrix_SeqBAIJ,
1389        0,
1390        0,
1391        MatGetMaps_Petsc};
1392 
1393 EXTERN_C_BEGIN
1394 #undef __FUNC__
1395 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1396 int MatStoreValues_SeqBAIJ(Mat mat)
1397 {
1398   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1399   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1400   int         ierr;
1401 
1402   PetscFunctionBegin;
1403   if (aij->nonew != 1) {
1404     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1405   }
1406 
1407   /* allocate space for values if not already there */
1408   if (!aij->saved_values) {
1409     aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1410   }
1411 
1412   /* copy values over */
1413   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 EXTERN_C_END
1417 
1418 EXTERN_C_BEGIN
1419 #undef __FUNC__
1420 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1421 int MatRetrieveValues_SeqBAIJ(Mat mat)
1422 {
1423   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1424   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1425 
1426   PetscFunctionBegin;
1427   if (aij->nonew != 1) {
1428     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1429   }
1430   if (!aij->saved_values) {
1431     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1432   }
1433 
1434   /* copy values over */
1435   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 EXTERN_C_END
1439 
1440 #undef __FUNC__
1441 #define __FUNC__ "MatCreateSeqBAIJ"
1442 /*@C
1443    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1444    compressed row) format.  For good matrix assembly performance the
1445    user should preallocate the matrix storage by setting the parameter nz
1446    (or the array nnz).  By setting these parameters accurately, performance
1447    during matrix assembly can be increased by more than a factor of 50.
1448 
1449    Collective on MPI_Comm
1450 
1451    Input Parameters:
1452 +  comm - MPI communicator, set to PETSC_COMM_SELF
1453 .  bs - size of block
1454 .  m - number of rows
1455 .  n - number of columns
1456 .  nz - number of block nonzeros per block row (same for all rows)
1457 -  nnz - array containing the number of block nonzeros in the various block rows
1458          (possibly different for each block row) or PETSC_NULL
1459 
1460    Output Parameter:
1461 .  A - the matrix
1462 
1463    Options Database Keys:
1464 .   -mat_no_unroll - uses code that does not unroll the loops in the
1465                      block calculations (much slower)
1466 .    -mat_block_size - size of the blocks to use
1467 
1468    Level: intermediate
1469 
1470    Notes:
1471    The block AIJ format is fully compatible with standard Fortran 77
1472    storage.  That is, the stored row and column indices can begin at
1473    either one (as in Fortran) or zero.  See the users' manual for details.
1474 
1475    Specify the preallocated storage with either nz or nnz (not both).
1476    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1477    allocation.  For additional details, see the users manual chapter on
1478    matrices.
1479 
1480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1481 @*/
1482 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1483 {
1484   Mat         B;
1485   Mat_SeqBAIJ *b;
1486   int         i,len,ierr,mbs,nbs,bs2,size;
1487   PetscTruth  flg;
1488 
1489   PetscFunctionBegin;
1490   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1491   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1492 
1493   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1494   mbs  = m/bs;
1495   nbs  = n/bs;
1496   bs2  = bs*bs;
1497 
1498   if (mbs*bs!=m || nbs*bs!=n) {
1499     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1500   }
1501 
1502   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1503   if (nnz) {
1504     for (i=0; i<mbs; i++) {
1505       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1506     }
1507   }
1508 
1509   *A = 0;
1510   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1511   PLogObjectCreate(B);
1512   B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1513   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1514   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1515   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1516   if (!flg) {
1517     switch (bs) {
1518     case 1:
1519       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1520       B->ops->solve           = MatSolve_SeqBAIJ_1;
1521       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1522       B->ops->mult            = MatMult_SeqBAIJ_1;
1523       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1524       break;
1525     case 2:
1526       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1527       B->ops->solve           = MatSolve_SeqBAIJ_2;
1528       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1529       B->ops->mult            = MatMult_SeqBAIJ_2;
1530       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1531       break;
1532     case 3:
1533       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1534       B->ops->solve           = MatSolve_SeqBAIJ_3;
1535       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1536       B->ops->mult            = MatMult_SeqBAIJ_3;
1537       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1538       break;
1539     case 4:
1540       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1541       B->ops->solve           = MatSolve_SeqBAIJ_4;
1542       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1543       B->ops->mult            = MatMult_SeqBAIJ_4;
1544       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1545       break;
1546     case 5:
1547       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1548       B->ops->solve           = MatSolve_SeqBAIJ_5;
1549       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1550       B->ops->mult            = MatMult_SeqBAIJ_5;
1551       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1552       break;
1553     case 6:
1554       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1555       B->ops->solve           = MatSolve_SeqBAIJ_6;
1556       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1557       B->ops->mult            = MatMult_SeqBAIJ_6;
1558       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1559       break;
1560     case 7:
1561       B->ops->mult            = MatMult_SeqBAIJ_7;
1562       B->ops->solve           = MatSolve_SeqBAIJ_7;
1563       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
1564       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1565       break;
1566     }
1567   }
1568   B->ops->destroy     = MatDestroy_SeqBAIJ;
1569   B->ops->view        = MatView_SeqBAIJ;
1570   B->factor           = 0;
1571   B->lupivotthreshold = 1.0;
1572   B->mapping          = 0;
1573   b->row              = 0;
1574   b->col              = 0;
1575   b->icol             = 0;
1576   b->reallocs         = 0;
1577   b->saved_values     = 0;
1578 
1579   b->m       = m; B->m = m; B->M = m;
1580   b->n       = n; B->n = n; B->N = n;
1581 
1582   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1583   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1584 
1585   b->mbs     = mbs;
1586   b->nbs     = nbs;
1587   b->imax    = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax);
1588   if (!nnz) {
1589     if (nz == PETSC_DEFAULT) nz = 5;
1590     else if (nz <= 0)        nz = 1;
1591     for (i=0; i<mbs; i++) b->imax[i] = nz;
1592     nz = nz*mbs;
1593   } else {
1594     nz = 0;
1595     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1596   }
1597 
1598   /* allocate the matrix space */
1599   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1600   b->a  = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a);
1601   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1602   b->j  = (int*)(b->a + nz*bs2);
1603   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1604   b->i  = b->j + nz;
1605   b->singlemalloc = PETSC_TRUE;
1606 
1607   b->i[0] = 0;
1608   for (i=1; i<mbs+1; i++) {
1609     b->i[i] = b->i[i-1] + b->imax[i-1];
1610   }
1611 
1612   /* b->ilen will count nonzeros in each block row so far. */
1613   b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));
1614   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1615   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1616 
1617   b->bs               = bs;
1618   b->bs2              = bs2;
1619   b->mbs              = mbs;
1620   b->nz               = 0;
1621   b->maxnz            = nz*bs2;
1622   b->sorted           = PETSC_FALSE;
1623   b->roworiented      = PETSC_TRUE;
1624   b->nonew            = 0;
1625   b->diag             = 0;
1626   b->solve_work       = 0;
1627   b->mult_work        = 0;
1628   b->spptr            = 0;
1629   B->info.nz_unneeded = (PetscReal)b->maxnz;
1630   b->keepzeroedrows   = PETSC_FALSE;
1631 
1632   *A = B;
1633   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1634   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1635 
1636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1637                                      "MatStoreValues_SeqBAIJ",
1638                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1640                                      "MatRetrieveValues_SeqBAIJ",
1641                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1643                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1644                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #undef __FUNC__
1649 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1650 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1651 {
1652   Mat         C;
1653   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1654   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1655 
1656   PetscFunctionBegin;
1657   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1658 
1659   *B = 0;
1660   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1661   PLogObjectCreate(C);
1662   C->data           = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1663   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1664   C->ops->destroy   = MatDestroy_SeqBAIJ;
1665   C->ops->view      = MatView_SeqBAIJ;
1666   C->factor         = A->factor;
1667   c->row            = 0;
1668   c->col            = 0;
1669   c->icol           = 0;
1670   c->saved_values   = 0;
1671   c->keepzeroedrows = a->keepzeroedrows;
1672   C->assembled      = PETSC_TRUE;
1673 
1674   c->m = C->m   = a->m;
1675   c->n = C->n   = a->n;
1676   C->M          = a->m;
1677   C->N          = a->n;
1678 
1679   c->bs         = a->bs;
1680   c->bs2        = a->bs2;
1681   c->mbs        = a->mbs;
1682   c->nbs        = a->nbs;
1683 
1684   c->imax       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1685   c->ilen       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1686   for (i=0; i<mbs; i++) {
1687     c->imax[i] = a->imax[i];
1688     c->ilen[i] = a->ilen[i];
1689   }
1690 
1691   /* allocate the matrix space */
1692   c->singlemalloc = PETSC_TRUE;
1693   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1694   c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a);
1695   c->j = (int*)(c->a + nz*bs2);
1696   c->i = c->j + nz;
1697   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1698   if (mbs > 0) {
1699     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1700     if (cpvalues == MAT_COPY_VALUES) {
1701       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1702     } else {
1703       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1704     }
1705   }
1706 
1707   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1708   c->sorted      = a->sorted;
1709   c->roworiented = a->roworiented;
1710   c->nonew       = a->nonew;
1711 
1712   if (a->diag) {
1713     c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag);
1714     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1715     for (i=0; i<mbs; i++) {
1716       c->diag[i] = a->diag[i];
1717     }
1718   } else c->diag        = 0;
1719   c->nz                 = a->nz;
1720   c->maxnz              = a->maxnz;
1721   c->solve_work         = 0;
1722   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1723   c->mult_work          = 0;
1724   *B = C;
1725   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNC__
1730 #define __FUNC__ "MatLoad_SeqBAIJ"
1731 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1732 {
1733   Mat_SeqBAIJ  *a;
1734   Mat          B;
1735   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1736   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1737   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1738   int          *masked,nmask,tmp,bs2,ishift;
1739   Scalar       *aa;
1740   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1741 
1742   PetscFunctionBegin;
1743   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1744   bs2  = bs*bs;
1745 
1746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1747   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1748   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1749   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1750   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1751   M = header[1]; N = header[2]; nz = header[3];
1752 
1753   if (header[3] < 0) {
1754     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1755   }
1756 
1757   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1758 
1759   /*
1760      This code adds extra rows to make sure the number of rows is
1761     divisible by the blocksize
1762   */
1763   mbs        = M/bs;
1764   extra_rows = bs - M + bs*(mbs);
1765   if (extra_rows == bs) extra_rows = 0;
1766   else                  mbs++;
1767   if (extra_rows) {
1768     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1769   }
1770 
1771   /* read in row lengths */
1772   rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1773   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1774   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1775 
1776   /* read in column indices */
1777   jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj);
1778   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1779   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1780 
1781   /* loop over row lengths determining block row lengths */
1782   browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1783   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1784   mask        = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask);
1785   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1786   masked      = mask + mbs;
1787   rowcount    = 0; nzcount = 0;
1788   for (i=0; i<mbs; i++) {
1789     nmask = 0;
1790     for (j=0; j<bs; j++) {
1791       kmax = rowlengths[rowcount];
1792       for (k=0; k<kmax; k++) {
1793         tmp = jj[nzcount++]/bs;
1794         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1795       }
1796       rowcount++;
1797     }
1798     browlengths[i] += nmask;
1799     /* zero out the mask elements we set */
1800     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1801   }
1802 
1803   /* create our matrix */
1804   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1805   B = *A;
1806   a = (Mat_SeqBAIJ*)B->data;
1807 
1808   /* set matrix "i" values */
1809   a->i[0] = 0;
1810   for (i=1; i<= mbs; i++) {
1811     a->i[i]      = a->i[i-1] + browlengths[i-1];
1812     a->ilen[i-1] = browlengths[i-1];
1813   }
1814   a->nz         = 0;
1815   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1816 
1817   /* read in nonzero values */
1818   aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1819   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1820   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1821 
1822   /* set "a" and "j" values into matrix */
1823   nzcount = 0; jcount = 0;
1824   for (i=0; i<mbs; i++) {
1825     nzcountb = nzcount;
1826     nmask    = 0;
1827     for (j=0; j<bs; j++) {
1828       kmax = rowlengths[i*bs+j];
1829       for (k=0; k<kmax; k++) {
1830         tmp = jj[nzcount++]/bs;
1831 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1832       }
1833       rowcount++;
1834     }
1835     /* sort the masked values */
1836     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1837 
1838     /* set "j" values into matrix */
1839     maskcount = 1;
1840     for (j=0; j<nmask; j++) {
1841       a->j[jcount++]  = masked[j];
1842       mask[masked[j]] = maskcount++;
1843     }
1844     /* set "a" values into matrix */
1845     ishift = bs2*a->i[i];
1846     for (j=0; j<bs; j++) {
1847       kmax = rowlengths[i*bs+j];
1848       for (k=0; k<kmax; k++) {
1849         tmp       = jj[nzcountb]/bs ;
1850         block     = mask[tmp] - 1;
1851         point     = jj[nzcountb] - bs*tmp;
1852         idx       = ishift + bs2*block + j + bs*point;
1853         a->a[idx] = (MatScalar)aa[nzcountb++];
1854       }
1855     }
1856     /* zero out the mask elements we set */
1857     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1858   }
1859   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1860 
1861   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1862   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1863   ierr = PetscFree(aa);CHKERRQ(ierr);
1864   ierr = PetscFree(jj);CHKERRQ(ierr);
1865   ierr = PetscFree(mask);CHKERRQ(ierr);
1866 
1867   B->assembled = PETSC_TRUE;
1868 
1869   ierr = MatView_Private(B);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 
1874 
1875