xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fdbc4c26573ee0b11b0935e81be87e6c27e56b8e)
1 /*$Id: baij.c,v 1.240 2001/07/16 20:10:28 buschelm Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled=PETSC_FALSE;
217       int        ierr;
218       ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr);
219 #if defined(PETSC_HAVE_SSE)
220       if (sse_enabled) {
221           a->single_precision_solves = PETSC_TRUE;
222           A->ops->solve              = MatSolve_SeqBAIJ_Update;
223           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
224           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
225           break;
226       }
227 #else
228       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
229 #endif
230     }
231     break;
232   default:
233     SETERRQ(PETSC_ERR_SUP,"unknown option");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
241 {
242   PetscFunctionBegin;
243   if (m) *m = 0;
244   if (n) *n = A->m;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatGetRow_SeqBAIJ"
250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
253   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
254   MatScalar    *aa,*aa_i;
255   Scalar       *v_i;
256 
257   PetscFunctionBegin;
258   bs  = a->bs;
259   ai  = a->i;
260   aj  = a->j;
261   aa  = a->a;
262   bs2 = a->bs2;
263 
264   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
265 
266   bn  = row/bs;   /* Block number */
267   bp  = row % bs; /* Block Position */
268   M   = ai[bn+1] - ai[bn];
269   *nz = bs*M;
270 
271   if (v) {
272     *v = 0;
273     if (*nz) {
274       ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr);
275       for (i=0; i<M; i++) { /* for each block in the block row */
276         v_i  = *v + i*bs;
277         aa_i = aa + bs2*(ai[bn] + i);
278         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
279       }
280     }
281   }
282 
283   if (idx) {
284     *idx = 0;
285     if (*nz) {
286       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
287       for (i=0; i<M; i++) { /* for each block in the block row */
288         idx_i = *idx + i*bs;
289         itmp  = bs*aj[ai[bn] + i];
290         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
291       }
292     }
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
300 {
301   int ierr;
302 
303   PetscFunctionBegin;
304   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
305   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatTranspose_SeqBAIJ"
311 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
312 {
313   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
314   Mat         C;
315   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
316   int         *rows,*cols,bs2=a->bs2;
317   Scalar      *array;
318 
319   PetscFunctionBegin;
320   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
321   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
322   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
323 
324 #if defined(PETSC_USE_MAT_SINGLE)
325   ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr);
326   for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
327 #else
328   array = a->a;
329 #endif
330 
331   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
332   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
333   ierr = PetscFree(col);CHKERRQ(ierr);
334   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
335   cols = rows + bs;
336   for (i=0; i<mbs; i++) {
337     cols[0] = i*bs;
338     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
339     len = ai[i+1] - ai[i];
340     for (j=0; j<len; j++) {
341       rows[0] = (*aj++)*bs;
342       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
343       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
344       array += bs2;
345     }
346   }
347   ierr = PetscFree(rows);CHKERRQ(ierr);
348 #if defined(PETSC_USE_MAT_SINGLE)
349   ierr = PetscFree(array);
350 #endif
351 
352   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354 
355   if (B) {
356     *B = C;
357   } else {
358     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
366 {
367   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
369   Scalar      *aa;
370   FILE        *file;
371 
372   PetscFunctionBegin;
373   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
374   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
375   col_lens[0] = MAT_COOKIE;
376 
377   col_lens[1] = A->m;
378   col_lens[2] = A->n;
379   col_lens[3] = a->nz*bs2;
380 
381   /* store lengths of each row and write (including header) to file */
382   count = 0;
383   for (i=0; i<a->mbs; i++) {
384     for (j=0; j<bs; j++) {
385       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
386     }
387   }
388   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
389   ierr = PetscFree(col_lens);CHKERRQ(ierr);
390 
391   /* store column indices (zero start index) */
392   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
393   count = 0;
394   for (i=0; i<a->mbs; i++) {
395     for (j=0; j<bs; j++) {
396       for (k=a->i[i]; k<a->i[i+1]; k++) {
397         for (l=0; l<bs; l++) {
398           jj[count++] = bs*a->j[k] + l;
399         }
400       }
401     }
402   }
403   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
404   ierr = PetscFree(jj);CHKERRQ(ierr);
405 
406   /* store nonzero values */
407   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
408   count = 0;
409   for (i=0; i<a->mbs; i++) {
410     for (j=0; j<bs; j++) {
411       for (k=a->i[i]; k<a->i[i+1]; k++) {
412         for (l=0; l<bs; l++) {
413           aa[count++] = a->a[bs2*k + l*bs + j];
414         }
415       }
416     }
417   }
418   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
419   ierr = PetscFree(aa);CHKERRQ(ierr);
420 
421   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
422   if (file) {
423     fprintf(file,"-matload_block_size %d\n",a->bs);
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
442   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
443     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   PetscViewer  viewer;
511 
512   PetscFunctionBegin;
513 
514   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
515   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
516 
517   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
518 
519   /* loop over matrix elements drawing boxes */
520   color = PETSC_DRAW_BLUE;
521   for (i=0,row=0; i<mbs; i++,row+=bs) {
522     for (j=a->i[i]; j<a->i[i+1]; j++) {
523       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
524       x_l = a->j[j]*bs; x_r = x_l + 1.0;
525       aa = a->a + j*bs2;
526       for (k=0; k<bs; k++) {
527         for (l=0; l<bs; l++) {
528           if (PetscRealPart(*aa++) >=  0.) continue;
529           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
530         }
531       }
532     }
533   }
534   color = PETSC_DRAW_CYAN;
535   for (i=0,row=0; i<mbs; i++,row+=bs) {
536     for (j=a->i[i]; j<a->i[i+1]; j++) {
537       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for (k=0; k<bs; k++) {
541         for (l=0; l<bs; l++) {
542           if (PetscRealPart(*aa++) != 0.) continue;
543           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548 
549   color = PETSC_DRAW_RED;
550   for (i=0,row=0; i<mbs; i++,row+=bs) {
551     for (j=a->i[i]; j<a->i[i+1]; j++) {
552       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
553       x_l = a->j[j]*bs; x_r = x_l + 1.0;
554       aa = a->a + j*bs2;
555       for (k=0; k<bs; k++) {
556         for (l=0; l<bs; l++) {
557           if (PetscRealPart(*aa++) <= 0.) continue;
558           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
569 {
570   int          ierr;
571   PetscReal    xl,yl,xr,yr,w,h;
572   PetscDraw    draw;
573   PetscTruth   isnull;
574 
575   PetscFunctionBegin;
576 
577   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
578   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
579 
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
581   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
582   xr += w;    yr += h;  xl = -w;     yl = -h;
583   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
584   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatView_SeqBAIJ"
591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
592 {
593   int        ierr;
594   PetscTruth issocket,isascii,isbinary,isdraw;
595 
596   PetscFunctionBegin;
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601   if (issocket) {
602     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
603   } else if (isascii){
604     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
605   } else if (isbinary) {
606     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
607   } else if (isdraw) {
608     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
609   } else {
610     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetValues_SeqBAIJ"
618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
619 {
620   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
621   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
622   int        *ai = a->i,*ailen = a->ilen;
623   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
624   MatScalar  *ap,*aa = a->a,zero = 0.0;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over rows */
628     row  = im[k]; brow = row/bs;
629     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
630     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
632     nrow = ailen[brow];
633     for (l=0; l<n; l++) { /* loop over columns */
634       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
635       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
636       col  = in[l] ;
637       bcol = col/bs;
638       cidx = col%bs;
639       ridx = row%bs;
640       high = nrow;
641       low  = 0; /* assume unsorted */
642       while (high-low > 5) {
643         t = (low+high)/2;
644         if (rp[t] > bcol) high = t;
645         else             low  = t;
646       }
647       for (i=low; i<high; i++) {
648         if (rp[i] > bcol) break;
649         if (rp[i] == bcol) {
650           *v++ = ap[bs2*i+bs*cidx+ridx];
651           goto finished;
652         }
653       }
654       *v++ = zero;
655       finished:;
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #if defined(PETSC_USE_MAT_SINGLE)
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
665 {
666   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
667   int         ierr,i,N = m*n*b->bs2;
668   MatScalar   *vsingle;
669 
670   PetscFunctionBegin;
671   if (N > b->setvalueslen) {
672     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
673     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
674     b->setvalueslen  = N;
675   }
676   vsingle = b->setvaluescopy;
677   for (i=0; i<N; i++) {
678     vsingle[i] = v[i];
679   }
680   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 #endif
684 
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
689 {
690   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
691   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
692   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
693   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
694   PetscTruth  roworiented=a->roworiented;
695   MatScalar   *value = v,*ap,*aa = a->a,*bap;
696 
697   PetscFunctionBegin;
698   if (roworiented) {
699     stepval = (n-1)*bs;
700   } else {
701     stepval = (m-1)*bs;
702   }
703   for (k=0; k<m; k++) { /* loop over added rows */
704     row  = im[k];
705     if (row < 0) continue;
706 #if defined(PETSC_USE_BOPT_g)
707     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
708 #endif
709     rp   = aj + ai[row];
710     ap   = aa + bs2*ai[row];
711     rmax = imax[row];
712     nrow = ailen[row];
713     low  = 0;
714     for (l=0; l<n; l++) { /* loop over added columns */
715       if (in[l] < 0) continue;
716 #if defined(PETSC_USE_BOPT_g)
717       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
718 #endif
719       col = in[l];
720       if (roworiented) {
721         value = v + k*(stepval+bs)*bs + l*bs;
722       } else {
723         value = v + l*(stepval+bs)*bs + k*bs;
724       }
725       if (!sorted) low = 0; high = nrow;
726       while (high-low > 7) {
727         t = (low+high)/2;
728         if (rp[t] > col) high = t;
729         else             low  = t;
730       }
731       for (i=low; i<high; i++) {
732         if (rp[i] > col) break;
733         if (rp[i] == col) {
734           bap  = ap +  bs2*i;
735           if (roworiented) {
736             if (is == ADD_VALUES) {
737               for (ii=0; ii<bs; ii++,value+=stepval) {
738                 for (jj=ii; jj<bs2; jj+=bs) {
739                   bap[jj] += *value++;
740                 }
741               }
742             } else {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] = *value++;
746                 }
747               }
748             }
749           } else {
750             if (is == ADD_VALUES) {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=0; jj<bs; jj++) {
753                   *bap++ += *value++;
754                 }
755               }
756             } else {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++  = *value++;
760                 }
761               }
762             }
763           }
764           goto noinsert2;
765         }
766       }
767       if (nonew == 1) goto noinsert2;
768       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
769       if (nrow >= rmax) {
770         /* there is no extra room in row, therefore enlarge */
771         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
772         MatScalar *new_a;
773 
774         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775 
776         /* malloc new storage space */
777         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
778 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
779         new_j   = (int*)(new_a + bs2*new_nz);
780         new_i   = new_j + new_nz;
781 
782         /* copy over old data into new slots */
783         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
784         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
785         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
786         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
787         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
788         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
789         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
790         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
791         /* free up old matrix storage */
792         ierr = PetscFree(a->a);CHKERRQ(ierr);
793         if (!a->singlemalloc) {
794           ierr = PetscFree(a->i);CHKERRQ(ierr);
795           ierr = PetscFree(a->j);CHKERRQ(ierr);
796         }
797         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
798         a->singlemalloc = PETSC_TRUE;
799 
800         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
801         rmax = imax[row] = imax[row] + CHUNKSIZE;
802         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
803         a->maxnz += bs2*CHUNKSIZE;
804         a->reallocs++;
805         a->nz++;
806       }
807       N = nrow++ - 1;
808       /* shift up all the later entries in this row */
809       for (ii=N; ii>=i; ii--) {
810         rp[ii+1] = rp[ii];
811         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
812       }
813       if (N >= i) {
814         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
815       }
816       rp[i] = col;
817       bap   = ap +  bs2*i;
818       if (roworiented) {
819         for (ii=0; ii<bs; ii++,value+=stepval) {
820           for (jj=ii; jj<bs2; jj+=bs) {
821             bap[jj] = *value++;
822           }
823         }
824       } else {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=0; jj<bs; jj++) {
827             *bap++  = *value++;
828           }
829         }
830       }
831       noinsert2:;
832       low = i;
833     }
834     ailen[row] = nrow;
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
843 {
844   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
845   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
846   int        m = A->m,*ip,N,*ailen = a->ilen;
847   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
848   MatScalar  *aa = a->a,*ap;
849 
850   PetscFunctionBegin;
851   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
852 
853   if (m) rmax = ailen[0];
854   for (i=1; i<mbs; i++) {
855     /* move each row back by the amount of empty slots (fshift) before it*/
856     fshift += imax[i-1] - ailen[i-1];
857     rmax   = PetscMax(rmax,ailen[i]);
858     if (fshift) {
859       ip = aj + ai[i]; ap = aa + bs2*ai[i];
860       N = ailen[i];
861       for (j=0; j<N; j++) {
862         ip[j-fshift] = ip[j];
863         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
864       }
865     }
866     ai[i] = ai[i-1] + ailen[i-1];
867   }
868   if (mbs) {
869     fshift += imax[mbs-1] - ailen[mbs-1];
870     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
871   }
872   /* reset ilen and imax for each row */
873   for (i=0; i<mbs; i++) {
874     ailen[i] = imax[i] = ai[i+1] - ai[i];
875   }
876   a->nz = ai[mbs];
877 
878   /* diagonals may have moved, so kill the diagonal pointers */
879   if (fshift && a->diag) {
880     ierr = PetscFree(a->diag);CHKERRQ(ierr);
881     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
882     a->diag = 0;
883   }
884   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
885            m,A->n,a->bs,fshift*bs2,a->nz*bs2);
886   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
887            a->reallocs);
888   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
889   a->reallocs          = 0;
890   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
891 
892   PetscFunctionReturn(0);
893 }
894 
895 
896 
897 /*
898    This function returns an array of flags which indicate the locations of contiguous
899    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
900    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
901    Assume: sizes should be long enough to hold all the values.
902 */
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
906 {
907   int        i,j,k,row;
908   PetscTruth flg;
909 
910   PetscFunctionBegin;
911   for (i=0,j=0; i<n; j++) {
912     row = idx[i];
913     if (row%bs!=0) { /* Not the begining of a block */
914       sizes[j] = 1;
915       i++;
916     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
917       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
918       i++;
919     } else { /* Begining of the block, so check if the complete block exists */
920       flg = PETSC_TRUE;
921       for (k=1; k<bs; k++) {
922         if (row+k != idx[i+k]) { /* break in the block */
923           flg = PETSC_FALSE;
924           break;
925         }
926       }
927       if (flg == PETSC_TRUE) { /* No break in the bs */
928         sizes[j] = bs;
929         i+= bs;
930       } else {
931         sizes[j] = 1;
932         i++;
933       }
934     }
935   }
936   *bs_max = j;
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
943 {
944   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
945   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
946   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
947   Scalar      zero = 0.0;
948   MatScalar   *aa;
949 
950   PetscFunctionBegin;
951   /* Make a copy of the IS and  sort it */
952   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
953   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
954 
955   /* allocate memory for rows,sizes */
956   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
957   sizes = rows + is_n;
958 
959   /* copy IS values to rows, and sort them */
960   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
961   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
962   if (baij->keepzeroedrows) {
963     for (i=0; i<is_n; i++) { sizes[i] = 1; }
964     bs_max = is_n;
965   } else {
966     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
967   }
968   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
969 
970   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
971     row   = rows[j];
972     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
973     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
974     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
975     if (sizes[i] == bs && !baij->keepzeroedrows) {
976       if (diag) {
977         if (baij->ilen[row/bs] > 0) {
978           baij->ilen[row/bs]       = 1;
979           baij->j[baij->i[row/bs]] = row/bs;
980           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
981         }
982         /* Now insert all the diagonal values for this bs */
983         for (k=0; k<bs; k++) {
984           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
985         }
986       } else { /* (!diag) */
987         baij->ilen[row/bs] = 0;
988       } /* end (!diag) */
989     } else { /* (sizes[i] != bs) */
990 #if defined (PETSC_USE_DEBUG)
991       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
992 #endif
993       for (k=0; k<count; k++) {
994         aa[0] =  zero;
995         aa    += bs;
996       }
997       if (diag) {
998         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
999       }
1000     }
1001   }
1002 
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
1011 {
1012   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1013   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1014   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1015   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1016   int         ridx,cidx,bs2=a->bs2,ierr;
1017   PetscTruth  roworiented=a->roworiented;
1018   MatScalar   *ap,value,*aa=a->a,*bap;
1019 
1020   PetscFunctionBegin;
1021   for (k=0; k<m; k++) { /* loop over added rows */
1022     row  = im[k]; brow = row/bs;
1023     if (row < 0) continue;
1024 #if defined(PETSC_USE_BOPT_g)
1025     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1026 #endif
1027     rp   = aj + ai[brow];
1028     ap   = aa + bs2*ai[brow];
1029     rmax = imax[brow];
1030     nrow = ailen[brow];
1031     low  = 0;
1032     for (l=0; l<n; l++) { /* loop over added columns */
1033       if (in[l] < 0) continue;
1034 #if defined(PETSC_USE_BOPT_g)
1035       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1036 #endif
1037       col = in[l]; bcol = col/bs;
1038       ridx = row % bs; cidx = col % bs;
1039       if (roworiented) {
1040         value = v[l + k*n];
1041       } else {
1042         value = v[k + l*m];
1043       }
1044       if (!sorted) low = 0; high = nrow;
1045       while (high-low > 7) {
1046         t = (low+high)/2;
1047         if (rp[t] > bcol) high = t;
1048         else              low  = t;
1049       }
1050       for (i=low; i<high; i++) {
1051         if (rp[i] > bcol) break;
1052         if (rp[i] == bcol) {
1053           bap  = ap +  bs2*i + bs*cidx + ridx;
1054           if (is == ADD_VALUES) *bap += value;
1055           else                  *bap  = value;
1056           goto noinsert1;
1057         }
1058       }
1059       if (nonew == 1) goto noinsert1;
1060       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1061       if (nrow >= rmax) {
1062         /* there is no extra room in row, therefore enlarge */
1063         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1064         MatScalar *new_a;
1065 
1066         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1067 
1068         /* Malloc new storage space */
1069         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1070 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1071         new_j   = (int*)(new_a + bs2*new_nz);
1072         new_i   = new_j + new_nz;
1073 
1074         /* copy over old data into new slots */
1075         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1076         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1077         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1078         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1079         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1080         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1081         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1082         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1083         /* free up old matrix storage */
1084         ierr = PetscFree(a->a);CHKERRQ(ierr);
1085         if (!a->singlemalloc) {
1086           ierr = PetscFree(a->i);CHKERRQ(ierr);
1087           ierr = PetscFree(a->j);CHKERRQ(ierr);
1088         }
1089         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1090         a->singlemalloc = PETSC_TRUE;
1091 
1092         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1093         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1094         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1095         a->maxnz += bs2*CHUNKSIZE;
1096         a->reallocs++;
1097         a->nz++;
1098       }
1099       N = nrow++ - 1;
1100       /* shift up all the later entries in this row */
1101       for (ii=N; ii>=i; ii--) {
1102         rp[ii+1] = rp[ii];
1103         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1104       }
1105       if (N>=i) {
1106         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1107       }
1108       rp[i]                      = bcol;
1109       ap[bs2*i + bs*cidx + ridx] = value;
1110       noinsert1:;
1111       low = i;
1112     }
1113     ailen[brow] = nrow;
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1121 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1122 {
1123   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1124   Mat         outA;
1125   int         ierr;
1126   PetscTruth  row_identity,col_identity;
1127 
1128   PetscFunctionBegin;
1129   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1130   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1131   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1132   if (!row_identity || !col_identity) {
1133     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1134   }
1135 
1136   outA          = inA;
1137   inA->factor   = FACTOR_LU;
1138 
1139   if (!a->diag) {
1140     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1141   }
1142 
1143   a->row        = row;
1144   a->col        = col;
1145   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1146   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1147 
1148   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1149   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1150   PetscLogObjectParent(inA,a->icol);
1151 
1152   /*
1153       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1154       for ILU(0) factorization with natural ordering
1155   */
1156   if (a->bs < 8) {
1157     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1158   } else {
1159     if (!a->solve_work) {
1160       ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1161       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1162     }
1163   }
1164 
1165   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1166 
1167   PetscFunctionReturn(0);
1168 }
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1171 int MatPrintHelp_SeqBAIJ(Mat A)
1172 {
1173   static PetscTruth called = PETSC_FALSE;
1174   MPI_Comm          comm = A->comm;
1175   int               ierr;
1176 
1177   PetscFunctionBegin;
1178   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1179   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1180   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 EXTERN_C_BEGIN
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1187 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1188 {
1189   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1190   int         i,nz,n;
1191 
1192   PetscFunctionBegin;
1193   nz = baij->maxnz;
1194   n  = mat->n;
1195   for (i=0; i<nz; i++) {
1196     baij->j[i] = indices[i];
1197   }
1198   baij->nz = nz;
1199   for (i=0; i<n; i++) {
1200     baij->ilen[i] = baij->imax[i];
1201   }
1202 
1203   PetscFunctionReturn(0);
1204 }
1205 EXTERN_C_END
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1209 /*@
1210     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1211        in the matrix.
1212 
1213   Input Parameters:
1214 +  mat - the SeqBAIJ matrix
1215 -  indices - the column indices
1216 
1217   Level: advanced
1218 
1219   Notes:
1220     This can be called if you have precomputed the nonzero structure of the
1221   matrix and want to provide it to the matrix object to improve the performance
1222   of the MatSetValues() operation.
1223 
1224     You MUST have set the correct numbers of nonzeros per row in the call to
1225   MatCreateSeqBAIJ().
1226 
1227     MUST be called before any calls to MatSetValues();
1228 
1229 @*/
1230 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1231 {
1232   int ierr,(*f)(Mat,int *);
1233 
1234   PetscFunctionBegin;
1235   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1236   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1237   if (f) {
1238     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1239   } else {
1240     SETERRQ(1,"Wrong type of matrix to set column indices");
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1247 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1248 {
1249   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1250   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1251   PetscReal    atmp;
1252   Scalar       *x,zero = 0.0;
1253   MatScalar    *aa;
1254   int          ncols,brow,krow,kcol;
1255 
1256   PetscFunctionBegin;
1257   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1258   bs   = a->bs;
1259   aa   = a->a;
1260   ai   = a->i;
1261   aj   = a->j;
1262   mbs = a->mbs;
1263 
1264   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1265   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1266   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1267   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1268   for (i=0; i<mbs; i++) {
1269     ncols = ai[1] - ai[0]; ai++;
1270     brow  = bs*i;
1271     for (j=0; j<ncols; j++){
1272       /* bcol = bs*(*aj); */
1273       for (kcol=0; kcol<bs; kcol++){
1274         for (krow=0; krow<bs; krow++){
1275           atmp = PetscAbsScalar(*aa); aa++;
1276           row = brow + krow;    /* row index */
1277           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1278           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1279         }
1280       }
1281       aj++;
1282     }
1283   }
1284   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1290 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1291 {
1292   int        ierr;
1293 
1294   PetscFunctionBegin;
1295   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1301 int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1302 {
1303   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1304   PetscFunctionBegin;
1305   *array = a->a;
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1311 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1312 {
1313   PetscFunctionBegin;
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 /* -------------------------------------------------------------------*/
1318 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1319        MatGetRow_SeqBAIJ,
1320        MatRestoreRow_SeqBAIJ,
1321        MatMult_SeqBAIJ_N,
1322        MatMultAdd_SeqBAIJ_N,
1323        MatMultTranspose_SeqBAIJ,
1324        MatMultTransposeAdd_SeqBAIJ,
1325        MatSolve_SeqBAIJ_N,
1326        0,
1327        0,
1328        0,
1329        MatLUFactor_SeqBAIJ,
1330        0,
1331        0,
1332        MatTranspose_SeqBAIJ,
1333        MatGetInfo_SeqBAIJ,
1334        MatEqual_SeqBAIJ,
1335        MatGetDiagonal_SeqBAIJ,
1336        MatDiagonalScale_SeqBAIJ,
1337        MatNorm_SeqBAIJ,
1338        0,
1339        MatAssemblyEnd_SeqBAIJ,
1340        0,
1341        MatSetOption_SeqBAIJ,
1342        MatZeroEntries_SeqBAIJ,
1343        MatZeroRows_SeqBAIJ,
1344        MatLUFactorSymbolic_SeqBAIJ,
1345        MatLUFactorNumeric_SeqBAIJ_N,
1346        0,
1347        0,
1348        MatSetUpPreallocation_SeqBAIJ,
1349        0,
1350        MatGetOwnershipRange_SeqBAIJ,
1351        MatILUFactorSymbolic_SeqBAIJ,
1352        0,
1353        MatGetArray_SeqBAIJ,
1354        MatRestoreArray_SeqBAIJ,
1355        MatDuplicate_SeqBAIJ,
1356        0,
1357        0,
1358        MatILUFactor_SeqBAIJ,
1359        0,
1360        0,
1361        MatGetSubMatrices_SeqBAIJ,
1362        MatIncreaseOverlap_SeqBAIJ,
1363        MatGetValues_SeqBAIJ,
1364        0,
1365        MatPrintHelp_SeqBAIJ,
1366        MatScale_SeqBAIJ,
1367        0,
1368        0,
1369        0,
1370        MatGetBlockSize_SeqBAIJ,
1371        MatGetRowIJ_SeqBAIJ,
1372        MatRestoreRowIJ_SeqBAIJ,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379        MatSetValuesBlocked_SeqBAIJ,
1380        MatGetSubMatrix_SeqBAIJ,
1381        MatDestroy_SeqBAIJ,
1382        MatView_SeqBAIJ,
1383        MatGetMaps_Petsc,
1384        0,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390        MatGetRowMax_SeqBAIJ,
1391        MatConvert_Basic};
1392 
1393 EXTERN_C_BEGIN
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1396 int MatStoreValues_SeqBAIJ(Mat mat)
1397 {
1398   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1399   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1400   int         ierr;
1401 
1402   PetscFunctionBegin;
1403   if (aij->nonew != 1) {
1404     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1405   }
1406 
1407   /* allocate space for values if not already there */
1408   if (!aij->saved_values) {
1409     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1410   }
1411 
1412   /* copy values over */
1413   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 EXTERN_C_END
1417 
1418 EXTERN_C_BEGIN
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1421 int MatRetrieveValues_SeqBAIJ(Mat mat)
1422 {
1423   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1424   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1425 
1426   PetscFunctionBegin;
1427   if (aij->nonew != 1) {
1428     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1429   }
1430   if (!aij->saved_values) {
1431     SETERRQ(1,"Must call MatStoreValues(A);first");
1432   }
1433 
1434   /* copy values over */
1435   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 EXTERN_C_END
1439 
1440 EXTERN_C_BEGIN
1441 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1442 EXTERN_C_END
1443 
1444 EXTERN_C_BEGIN
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatCreate_SeqBAIJ"
1447 int MatCreate_SeqBAIJ(Mat B)
1448 {
1449   int         ierr,size;
1450   Mat_SeqBAIJ *b;
1451 
1452   PetscFunctionBegin;
1453   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1454   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1455 
1456   B->m = B->M = PetscMax(B->m,B->M);
1457   B->n = B->N = PetscMax(B->n,B->N);
1458   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1459   B->data = (void*)b;
1460   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1461   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1462   B->factor           = 0;
1463   B->lupivotthreshold = 1.0;
1464   B->mapping          = 0;
1465   b->row              = 0;
1466   b->col              = 0;
1467   b->icol             = 0;
1468   b->reallocs         = 0;
1469   b->saved_values     = 0;
1470 #if defined(PETSC_USE_MAT_SINGLE)
1471   b->setvalueslen     = 0;
1472   b->setvaluescopy    = PETSC_NULL;
1473 #endif
1474   b->single_precision_solves = PETSC_FALSE;
1475 
1476   ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1477   ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1478 
1479   b->sorted           = PETSC_FALSE;
1480   b->roworiented      = PETSC_TRUE;
1481   b->nonew            = 0;
1482   b->diag             = 0;
1483   b->solve_work       = 0;
1484   b->mult_work        = 0;
1485   b->spptr            = 0;
1486   B->info.nz_unneeded = (PetscReal)b->maxnz;
1487   b->keepzeroedrows   = PETSC_FALSE;
1488 
1489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1490                                      "MatStoreValues_SeqBAIJ",
1491                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1493                                      "MatRetrieveValues_SeqBAIJ",
1494                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1496                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1497                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1499                                      "MatConvert_SeqBAIJ_SeqAIJ",
1500                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 EXTERN_C_END
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1507 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1508 {
1509   Mat         C;
1510   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1511   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1512 
1513   PetscFunctionBegin;
1514   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1515 
1516   *B = 0;
1517   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1518   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1519   c    = (Mat_SeqBAIJ*)C->data;
1520 
1521   c->bs         = a->bs;
1522   c->bs2        = a->bs2;
1523   c->mbs        = a->mbs;
1524   c->nbs        = a->nbs;
1525   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1526 
1527   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1528   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1529   for (i=0; i<mbs; i++) {
1530     c->imax[i] = a->imax[i];
1531     c->ilen[i] = a->ilen[i];
1532   }
1533 
1534   /* allocate the matrix space */
1535   c->singlemalloc = PETSC_TRUE;
1536   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1537   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1538   c->j = (int*)(c->a + nz*bs2);
1539   c->i = c->j + nz;
1540   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1541   if (mbs > 0) {
1542     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1543     if (cpvalues == MAT_COPY_VALUES) {
1544       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1545     } else {
1546       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1547     }
1548   }
1549 
1550   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1551   c->sorted      = a->sorted;
1552   c->roworiented = a->roworiented;
1553   c->nonew       = a->nonew;
1554 
1555   if (a->diag) {
1556     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1557     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1558     for (i=0; i<mbs; i++) {
1559       c->diag[i] = a->diag[i];
1560     }
1561   } else c->diag        = 0;
1562   c->nz                 = a->nz;
1563   c->maxnz              = a->maxnz;
1564   c->solve_work         = 0;
1565   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1566   c->mult_work          = 0;
1567   C->preallocated       = PETSC_TRUE;
1568   C->assembled          = PETSC_TRUE;
1569   *B = C;
1570   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 EXTERN_C_BEGIN
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatLoad_SeqBAIJ"
1577 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1578 {
1579   Mat_SeqBAIJ  *a;
1580   Mat          B;
1581   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1582   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1583   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1584   int          *masked,nmask,tmp,bs2,ishift;
1585   Scalar       *aa;
1586   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1587 
1588   PetscFunctionBegin;
1589   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1590   bs2  = bs*bs;
1591 
1592   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1593   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1594   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1595   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1596   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1597   M = header[1]; N = header[2]; nz = header[3];
1598 
1599   if (header[3] < 0) {
1600     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1601   }
1602 
1603   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1604 
1605   /*
1606      This code adds extra rows to make sure the number of rows is
1607     divisible by the blocksize
1608   */
1609   mbs        = M/bs;
1610   extra_rows = bs - M + bs*(mbs);
1611   if (extra_rows == bs) extra_rows = 0;
1612   else                  mbs++;
1613   if (extra_rows) {
1614     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1615   }
1616 
1617   /* read in row lengths */
1618   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1619   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1620   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1621 
1622   /* read in column indices */
1623   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1624   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1625   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1626 
1627   /* loop over row lengths determining block row lengths */
1628   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1629   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1630   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1631   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1632   masked   = mask + mbs;
1633   rowcount = 0; nzcount = 0;
1634   for (i=0; i<mbs; i++) {
1635     nmask = 0;
1636     for (j=0; j<bs; j++) {
1637       kmax = rowlengths[rowcount];
1638       for (k=0; k<kmax; k++) {
1639         tmp = jj[nzcount++]/bs;
1640         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1641       }
1642       rowcount++;
1643     }
1644     browlengths[i] += nmask;
1645     /* zero out the mask elements we set */
1646     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1647   }
1648 
1649   /* create our matrix */
1650   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1651   B = *A;
1652   a = (Mat_SeqBAIJ*)B->data;
1653 
1654   /* set matrix "i" values */
1655   a->i[0] = 0;
1656   for (i=1; i<= mbs; i++) {
1657     a->i[i]      = a->i[i-1] + browlengths[i-1];
1658     a->ilen[i-1] = browlengths[i-1];
1659   }
1660   a->nz         = 0;
1661   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1662 
1663   /* read in nonzero values */
1664   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1665   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1666   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1667 
1668   /* set "a" and "j" values into matrix */
1669   nzcount = 0; jcount = 0;
1670   for (i=0; i<mbs; i++) {
1671     nzcountb = nzcount;
1672     nmask    = 0;
1673     for (j=0; j<bs; j++) {
1674       kmax = rowlengths[i*bs+j];
1675       for (k=0; k<kmax; k++) {
1676         tmp = jj[nzcount++]/bs;
1677 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1678       }
1679     }
1680     /* sort the masked values */
1681     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1682 
1683     /* set "j" values into matrix */
1684     maskcount = 1;
1685     for (j=0; j<nmask; j++) {
1686       a->j[jcount++]  = masked[j];
1687       mask[masked[j]] = maskcount++;
1688     }
1689     /* set "a" values into matrix */
1690     ishift = bs2*a->i[i];
1691     for (j=0; j<bs; j++) {
1692       kmax = rowlengths[i*bs+j];
1693       for (k=0; k<kmax; k++) {
1694         tmp       = jj[nzcountb]/bs ;
1695         block     = mask[tmp] - 1;
1696         point     = jj[nzcountb] - bs*tmp;
1697         idx       = ishift + bs2*block + j + bs*point;
1698         a->a[idx] = (MatScalar)aa[nzcountb++];
1699       }
1700     }
1701     /* zero out the mask elements we set */
1702     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1703   }
1704   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1705 
1706   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1707   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1708   ierr = PetscFree(aa);CHKERRQ(ierr);
1709   ierr = PetscFree(jj);CHKERRQ(ierr);
1710   ierr = PetscFree(mask);CHKERRQ(ierr);
1711 
1712   B->assembled = PETSC_TRUE;
1713 
1714   ierr = MatView_Private(B);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 EXTERN_C_END
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatCreateSeqBAIJ"
1721 /*@C
1722    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1723    compressed row) format.  For good matrix assembly performance the
1724    user should preallocate the matrix storage by setting the parameter nz
1725    (or the array nnz).  By setting these parameters accurately, performance
1726    during matrix assembly can be increased by more than a factor of 50.
1727 
1728    Collective on MPI_Comm
1729 
1730    Input Parameters:
1731 +  comm - MPI communicator, set to PETSC_COMM_SELF
1732 .  bs - size of block
1733 .  m - number of rows
1734 .  n - number of columns
1735 .  nz - number of nonzero blocks  per block row (same for all rows)
1736 -  nnz - array containing the number of nonzero blocks in the various block rows
1737          (possibly different for each block row) or PETSC_NULL
1738 
1739    Output Parameter:
1740 .  A - the matrix
1741 
1742    Options Database Keys:
1743 .   -mat_no_unroll - uses code that does not unroll the loops in the
1744                      block calculations (much slower)
1745 .    -mat_block_size - size of the blocks to use
1746 
1747    Level: intermediate
1748 
1749    Notes:
1750    A nonzero block is any block that as 1 or more nonzeros in it
1751 
1752    The block AIJ format is fully compatible with standard Fortran 77
1753    storage.  That is, the stored row and column indices can begin at
1754    either one (as in Fortran) or zero.  See the users' manual for details.
1755 
1756    Specify the preallocated storage with either nz or nnz (not both).
1757    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1758    allocation.  For additional details, see the users manual chapter on
1759    matrices.
1760 
1761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1762 @*/
1763 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1764 {
1765   int ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1769   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1770   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 #undef __FUNCT__
1775 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1776 /*@C
1777    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1778    per row in the matrix. For good matrix assembly performance the
1779    user should preallocate the matrix storage by setting the parameter nz
1780    (or the array nnz).  By setting these parameters accurately, performance
1781    during matrix assembly can be increased by more than a factor of 50.
1782 
1783    Collective on MPI_Comm
1784 
1785    Input Parameters:
1786 +  A - the matrix
1787 .  bs - size of block
1788 .  nz - number of block nonzeros per block row (same for all rows)
1789 -  nnz - array containing the number of block nonzeros in the various block rows
1790          (possibly different for each block row) or PETSC_NULL
1791 
1792    Options Database Keys:
1793 .   -mat_no_unroll - uses code that does not unroll the loops in the
1794                      block calculations (much slower)
1795 .    -mat_block_size - size of the blocks to use
1796 
1797    Level: intermediate
1798 
1799    Notes:
1800    The block AIJ format is fully compatible with standard Fortran 77
1801    storage.  That is, the stored row and column indices can begin at
1802    either one (as in Fortran) or zero.  See the users' manual for details.
1803 
1804    Specify the preallocated storage with either nz or nnz (not both).
1805    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1806    allocation.  For additional details, see the users manual chapter on
1807    matrices.
1808 
1809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1810 @*/
1811 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1812 {
1813   Mat_SeqBAIJ *b;
1814   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1815   PetscTruth  flg;
1816 
1817   PetscFunctionBegin;
1818   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1819   if (!flg) PetscFunctionReturn(0);
1820 
1821   B->preallocated = PETSC_TRUE;
1822   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1823   if (nnz && newbs != bs) {
1824     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1825   }
1826   bs   = newbs;
1827 
1828   mbs  = B->m/bs;
1829   nbs  = B->n/bs;
1830   bs2  = bs*bs;
1831 
1832   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1833     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1834   }
1835 
1836   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1837   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1838   if (nnz) {
1839     for (i=0; i<mbs; i++) {
1840       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1841       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1842     }
1843   }
1844 
1845   b       = (Mat_SeqBAIJ*)B->data;
1846   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1847   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1848   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1849   if (!flg) {
1850     switch (bs) {
1851     case 1:
1852       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1853       B->ops->mult            = MatMult_SeqBAIJ_1;
1854       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1855       break;
1856     case 2:
1857       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1858       B->ops->mult            = MatMult_SeqBAIJ_2;
1859       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1860       break;
1861     case 3:
1862       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1863       B->ops->mult            = MatMult_SeqBAIJ_3;
1864       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1865       break;
1866     case 4:
1867       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1868       B->ops->mult            = MatMult_SeqBAIJ_4;
1869       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1870       break;
1871     case 5:
1872       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1873       B->ops->mult            = MatMult_SeqBAIJ_5;
1874       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1875       break;
1876     case 6:
1877       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1878       B->ops->mult            = MatMult_SeqBAIJ_6;
1879       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1880       break;
1881     case 7:
1882       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1883       B->ops->mult            = MatMult_SeqBAIJ_7;
1884       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1885       break;
1886     default:
1887       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1888       B->ops->mult            = MatMult_SeqBAIJ_N;
1889       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1890       break;
1891     }
1892   }
1893   b->bs      = bs;
1894   b->mbs     = mbs;
1895   b->nbs     = nbs;
1896   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1897   if (!nnz) {
1898     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1899     else if (nz <= 0)        nz = 1;
1900     for (i=0; i<mbs; i++) b->imax[i] = nz;
1901     nz = nz*mbs;
1902   } else {
1903     nz = 0;
1904     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1905   }
1906 
1907   /* allocate the matrix space */
1908   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1909   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1910   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1911   b->j  = (int*)(b->a + nz*bs2);
1912   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1913   b->i  = b->j + nz;
1914   b->singlemalloc = PETSC_TRUE;
1915 
1916   b->i[0] = 0;
1917   for (i=1; i<mbs+1; i++) {
1918     b->i[i] = b->i[i-1] + b->imax[i-1];
1919   }
1920 
1921   /* b->ilen will count nonzeros in each block row so far. */
1922   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1923   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1924   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1925 
1926   b->bs               = bs;
1927   b->bs2              = bs2;
1928   b->mbs              = mbs;
1929   b->nz               = 0;
1930   b->maxnz            = nz*bs2;
1931   B->info.nz_unneeded = (PetscReal)b->maxnz;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935