xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 282663a71f2ad042b2f11dcbaffd71b730fde5dd)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213   case MAT_USE_SINGLE_PRECISION_SOLVES:
214     if (a->bs==4) {
215       PetscTruth sse_enabled_local,sse_enabled_global;
216       int        ierr;
217 
218       sse_enabled_local  = PETSC_FALSE;
219       sse_enabled_global = PETSC_FALSE;
220 
221       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
222 #if defined(PETSC_HAVE_SSE)
223       if (sse_enabled_local) {
224           a->single_precision_solves = PETSC_TRUE;
225           A->ops->solve              = MatSolve_SeqBAIJ_Update;
226           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
227           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
228           break;
229       } else {
230         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
231       }
232 #else
233       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
234 #endif
235     }
236     break;
237   default:
238     SETERRQ(PETSC_ERR_SUP,"unknown option");
239   }
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatGetRow_SeqBAIJ"
245 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
246 {
247   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
248   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
249   MatScalar    *aa,*aa_i;
250   PetscScalar  *v_i;
251 
252   PetscFunctionBegin;
253   bs  = a->bs;
254   ai  = a->i;
255   aj  = a->j;
256   aa  = a->a;
257   bs2 = a->bs2;
258 
259   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
260 
261   bn  = row/bs;   /* Block number */
262   bp  = row % bs; /* Block Position */
263   M   = ai[bn+1] - ai[bn];
264   *nz = bs*M;
265 
266   if (v) {
267     *v = 0;
268     if (*nz) {
269       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
270       for (i=0; i<M; i++) { /* for each block in the block row */
271         v_i  = *v + i*bs;
272         aa_i = aa + bs2*(ai[bn] + i);
273         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
274       }
275     }
276   }
277 
278   if (idx) {
279     *idx = 0;
280     if (*nz) {
281       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
282       for (i=0; i<M; i++) { /* for each block in the block row */
283         idx_i = *idx + i*bs;
284         itmp  = bs*aj[ai[bn] + i];
285         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
286       }
287     }
288   }
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
294 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
295 {
296   int ierr;
297 
298   PetscFunctionBegin;
299   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
300   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatTranspose_SeqBAIJ"
306 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
307 {
308   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
309   Mat         C;
310   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
311   int         *rows,*cols,bs2=a->bs2;
312   PetscScalar *array;
313 
314   PetscFunctionBegin;
315   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
316   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
317   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
318 
319 #if defined(PETSC_USE_MAT_SINGLE)
320   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
321   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
322 #else
323   array = a->a;
324 #endif
325 
326   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
327   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
328   ierr = PetscFree(col);CHKERRQ(ierr);
329   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
330   cols = rows + bs;
331   for (i=0; i<mbs; i++) {
332     cols[0] = i*bs;
333     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
334     len = ai[i+1] - ai[i];
335     for (j=0; j<len; j++) {
336       rows[0] = (*aj++)*bs;
337       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
338       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
339       array += bs2;
340     }
341   }
342   ierr = PetscFree(rows);CHKERRQ(ierr);
343 #if defined(PETSC_USE_MAT_SINGLE)
344   ierr = PetscFree(array);
345 #endif
346 
347   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349 
350   if (B) {
351     *B = C;
352   } else {
353     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
360 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
361 {
362   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
363   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
364   PetscScalar *aa;
365   FILE        *file;
366 
367   PetscFunctionBegin;
368   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
369   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
370   col_lens[0] = MAT_FILE_COOKIE;
371 
372   col_lens[1] = A->m;
373   col_lens[2] = A->n;
374   col_lens[3] = a->nz*bs2;
375 
376   /* store lengths of each row and write (including header) to file */
377   count = 0;
378   for (i=0; i<a->mbs; i++) {
379     for (j=0; j<bs; j++) {
380       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
381     }
382   }
383   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
384   ierr = PetscFree(col_lens);CHKERRQ(ierr);
385 
386   /* store column indices (zero start index) */
387   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
388   count = 0;
389   for (i=0; i<a->mbs; i++) {
390     for (j=0; j<bs; j++) {
391       for (k=a->i[i]; k<a->i[i+1]; k++) {
392         for (l=0; l<bs; l++) {
393           jj[count++] = bs*a->j[k] + l;
394         }
395       }
396     }
397   }
398   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
399   ierr = PetscFree(jj);CHKERRQ(ierr);
400 
401   /* store nonzero values */
402   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
403   count = 0;
404   for (i=0; i<a->mbs; i++) {
405     for (j=0; j<bs; j++) {
406       for (k=a->i[i]; k<a->i[i+1]; k++) {
407         for (l=0; l<bs; l++) {
408           aa[count++] = a->a[bs2*k + l*bs + j];
409         }
410       }
411     }
412   }
413   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
414   ierr = PetscFree(aa);CHKERRQ(ierr);
415 
416   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
417   if (file) {
418     fprintf(file,"-matload_block_size %d\n",a->bs);
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
425 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
429   PetscViewerFormat format;
430 
431   PetscFunctionBegin;
432   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
433   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
434     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
435   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
436     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
437   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
438     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
439     for (i=0; i<a->mbs; i++) {
440       for (j=0; j<bs; j++) {
441         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
442         for (k=a->i[i]; k<a->i[i+1]; k++) {
443           for (l=0; l<bs; l++) {
444 #if defined(PETSC_USE_COMPLEX)
445             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
446               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
447                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
448             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
449               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
450                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
451             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
452               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             }
454 #else
455             if (a->a[bs2*k + l*bs + j] != 0.0) {
456               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
457             }
458 #endif
459           }
460         }
461         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
462       }
463     }
464     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
465   } else {
466     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
467     for (i=0; i<a->mbs; i++) {
468       for (j=0; j<bs; j++) {
469         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
470         for (k=a->i[i]; k<a->i[i+1]; k++) {
471           for (l=0; l<bs; l++) {
472 #if defined(PETSC_USE_COMPLEX)
473             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
474               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
475                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
476             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
477               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
478                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
479             } else {
480               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             }
482 #else
483             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
484 #endif
485           }
486         }
487         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
488       }
489     }
490     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
491   }
492   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
498 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
499 {
500   Mat          A = (Mat) Aa;
501   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
502   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
503   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
504   MatScalar    *aa;
505   PetscViewer  viewer;
506 
507   PetscFunctionBegin;
508 
509   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
510   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
511 
512   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
513 
514   /* loop over matrix elements drawing boxes */
515   color = PETSC_DRAW_BLUE;
516   for (i=0,row=0; i<mbs; i++,row+=bs) {
517     for (j=a->i[i]; j<a->i[i+1]; j++) {
518       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
519       x_l = a->j[j]*bs; x_r = x_l + 1.0;
520       aa = a->a + j*bs2;
521       for (k=0; k<bs; k++) {
522         for (l=0; l<bs; l++) {
523           if (PetscRealPart(*aa++) >=  0.) continue;
524           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
525         }
526       }
527     }
528   }
529   color = PETSC_DRAW_CYAN;
530   for (i=0,row=0; i<mbs; i++,row+=bs) {
531     for (j=a->i[i]; j<a->i[i+1]; j++) {
532       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
533       x_l = a->j[j]*bs; x_r = x_l + 1.0;
534       aa = a->a + j*bs2;
535       for (k=0; k<bs; k++) {
536         for (l=0; l<bs; l++) {
537           if (PetscRealPart(*aa++) != 0.) continue;
538           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543 
544   color = PETSC_DRAW_RED;
545   for (i=0,row=0; i<mbs; i++,row+=bs) {
546     for (j=a->i[i]; j<a->i[i+1]; j++) {
547       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
548       x_l = a->j[j]*bs; x_r = x_l + 1.0;
549       aa = a->a + j*bs2;
550       for (k=0; k<bs; k++) {
551         for (l=0; l<bs; l++) {
552           if (PetscRealPart(*aa++) <= 0.) continue;
553           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
554         }
555       }
556     }
557   }
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
563 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
564 {
565   int          ierr;
566   PetscReal    xl,yl,xr,yr,w,h;
567   PetscDraw    draw;
568   PetscTruth   isnull;
569 
570   PetscFunctionBegin;
571 
572   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
573   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
574 
575   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
576   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
577   xr += w;    yr += h;  xl = -w;     yl = -h;
578   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
579   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
580   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatView_SeqBAIJ"
586 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
587 {
588   int        ierr;
589   PetscTruth issocket,isascii,isbinary,isdraw;
590 
591   PetscFunctionBegin;
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
596   if (issocket) {
597     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
598   } else if (isascii){
599     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
600   } else if (isbinary) {
601     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
602   } else if (isdraw) {
603     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
604   } else {
605     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
606   }
607   PetscFunctionReturn(0);
608 }
609 
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatGetValues_SeqBAIJ"
613 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
614 {
615   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
616   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
617   int        *ai = a->i,*ailen = a->ilen;
618   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
619   MatScalar  *ap,*aa = a->a,zero = 0.0;
620 
621   PetscFunctionBegin;
622   for (k=0; k<m; k++) { /* loop over rows */
623     row  = im[k]; brow = row/bs;
624     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
625     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
626     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
627     nrow = ailen[brow];
628     for (l=0; l<n; l++) { /* loop over columns */
629       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
630       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
631       col  = in[l] ;
632       bcol = col/bs;
633       cidx = col%bs;
634       ridx = row%bs;
635       high = nrow;
636       low  = 0; /* assume unsorted */
637       while (high-low > 5) {
638         t = (low+high)/2;
639         if (rp[t] > bcol) high = t;
640         else             low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > bcol) break;
644         if (rp[i] == bcol) {
645           *v++ = ap[bs2*i+bs*cidx+ridx];
646           goto finished;
647         }
648       }
649       *v++ = zero;
650       finished:;
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 #if defined(PETSC_USE_MAT_SINGLE)
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
659 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
660 {
661   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
662   int         ierr,i,N = m*n*b->bs2;
663   MatScalar   *vsingle;
664 
665   PetscFunctionBegin;
666   if (N > b->setvalueslen) {
667     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
668     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
669     b->setvalueslen  = N;
670   }
671   vsingle = b->setvaluescopy;
672   for (i=0; i<N; i++) {
673     vsingle[i] = v[i];
674   }
675   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 #endif
679 
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
683 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
684 {
685   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
686   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
687   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
688   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
689   PetscTruth  roworiented=a->roworiented;
690   MatScalar   *value = v,*ap,*aa = a->a,*bap;
691 
692   PetscFunctionBegin;
693   if (roworiented) {
694     stepval = (n-1)*bs;
695   } else {
696     stepval = (m-1)*bs;
697   }
698   for (k=0; k<m; k++) { /* loop over added rows */
699     row  = im[k];
700     if (row < 0) continue;
701 #if defined(PETSC_USE_BOPT_g)
702     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
703 #endif
704     rp   = aj + ai[row];
705     ap   = aa + bs2*ai[row];
706     rmax = imax[row];
707     nrow = ailen[row];
708     low  = 0;
709     for (l=0; l<n; l++) { /* loop over added columns */
710       if (in[l] < 0) continue;
711 #if defined(PETSC_USE_BOPT_g)
712       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
713 #endif
714       col = in[l];
715       if (roworiented) {
716         value = v + k*(stepval+bs)*bs + l*bs;
717       } else {
718         value = v + l*(stepval+bs)*bs + k*bs;
719       }
720       if (!sorted) low = 0; high = nrow;
721       while (high-low > 7) {
722         t = (low+high)/2;
723         if (rp[t] > col) high = t;
724         else             low  = t;
725       }
726       for (i=low; i<high; i++) {
727         if (rp[i] > col) break;
728         if (rp[i] == col) {
729           bap  = ap +  bs2*i;
730           if (roworiented) {
731             if (is == ADD_VALUES) {
732               for (ii=0; ii<bs; ii++,value+=stepval) {
733                 for (jj=ii; jj<bs2; jj+=bs) {
734                   bap[jj] += *value++;
735                 }
736               }
737             } else {
738               for (ii=0; ii<bs; ii++,value+=stepval) {
739                 for (jj=ii; jj<bs2; jj+=bs) {
740                   bap[jj] = *value++;
741                 }
742               }
743             }
744           } else {
745             if (is == ADD_VALUES) {
746               for (ii=0; ii<bs; ii++,value+=stepval) {
747                 for (jj=0; jj<bs; jj++) {
748                   *bap++ += *value++;
749                 }
750               }
751             } else {
752               for (ii=0; ii<bs; ii++,value+=stepval) {
753                 for (jj=0; jj<bs; jj++) {
754                   *bap++  = *value++;
755                 }
756               }
757             }
758           }
759           goto noinsert2;
760         }
761       }
762       if (nonew == 1) goto noinsert2;
763       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
764       if (nrow >= rmax) {
765         /* there is no extra room in row, therefore enlarge */
766         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
767         MatScalar *new_a;
768 
769         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
770 
771         /* malloc new storage space */
772         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
773 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
774         new_j   = (int*)(new_a + bs2*new_nz);
775         new_i   = new_j + new_nz;
776 
777         /* copy over old data into new slots */
778         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
779         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
780         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
781         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
782         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
783         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
784         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
785         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
786         /* free up old matrix storage */
787         ierr = PetscFree(a->a);CHKERRQ(ierr);
788         if (!a->singlemalloc) {
789           ierr = PetscFree(a->i);CHKERRQ(ierr);
790           ierr = PetscFree(a->j);CHKERRQ(ierr);
791         }
792         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
793         a->singlemalloc = PETSC_TRUE;
794 
795         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
796         rmax = imax[row] = imax[row] + CHUNKSIZE;
797         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
798         a->maxnz += bs2*CHUNKSIZE;
799         a->reallocs++;
800         a->nz++;
801       }
802       N = nrow++ - 1;
803       /* shift up all the later entries in this row */
804       for (ii=N; ii>=i; ii--) {
805         rp[ii+1] = rp[ii];
806         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
807       }
808       if (N >= i) {
809         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
810       }
811       rp[i] = col;
812       bap   = ap +  bs2*i;
813       if (roworiented) {
814         for (ii=0; ii<bs; ii++,value+=stepval) {
815           for (jj=ii; jj<bs2; jj+=bs) {
816             bap[jj] = *value++;
817           }
818         }
819       } else {
820         for (ii=0; ii<bs; ii++,value+=stepval) {
821           for (jj=0; jj<bs; jj++) {
822             *bap++  = *value++;
823           }
824         }
825       }
826       noinsert2:;
827       low = i;
828     }
829     ailen[row] = nrow;
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
837 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
838 {
839   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
840   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
841   int        m = A->m,*ip,N,*ailen = a->ilen;
842   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
843   MatScalar  *aa = a->a,*ap;
844 
845   PetscFunctionBegin;
846   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
847 
848   if (m) rmax = ailen[0];
849   for (i=1; i<mbs; i++) {
850     /* move each row back by the amount of empty slots (fshift) before it*/
851     fshift += imax[i-1] - ailen[i-1];
852     rmax   = PetscMax(rmax,ailen[i]);
853     if (fshift) {
854       ip = aj + ai[i]; ap = aa + bs2*ai[i];
855       N = ailen[i];
856       for (j=0; j<N; j++) {
857         ip[j-fshift] = ip[j];
858         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
859       }
860     }
861     ai[i] = ai[i-1] + ailen[i-1];
862   }
863   if (mbs) {
864     fshift += imax[mbs-1] - ailen[mbs-1];
865     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
866   }
867   /* reset ilen and imax for each row */
868   for (i=0; i<mbs; i++) {
869     ailen[i] = imax[i] = ai[i+1] - ai[i];
870   }
871   a->nz = ai[mbs];
872 
873   /* diagonals may have moved, so kill the diagonal pointers */
874   if (fshift && a->diag) {
875     ierr = PetscFree(a->diag);CHKERRQ(ierr);
876     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
877     a->diag = 0;
878   }
879   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
880   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
881   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
882   a->reallocs          = 0;
883   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
884 
885   PetscFunctionReturn(0);
886 }
887 
888 
889 
890 /*
891    This function returns an array of flags which indicate the locations of contiguous
892    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
893    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
894    Assume: sizes should be long enough to hold all the values.
895 */
896 #undef __FUNCT__
897 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
898 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
899 {
900   int        i,j,k,row;
901   PetscTruth flg;
902 
903   PetscFunctionBegin;
904   for (i=0,j=0; i<n; j++) {
905     row = idx[i];
906     if (row%bs!=0) { /* Not the begining of a block */
907       sizes[j] = 1;
908       i++;
909     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
910       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
911       i++;
912     } else { /* Begining of the block, so check if the complete block exists */
913       flg = PETSC_TRUE;
914       for (k=1; k<bs; k++) {
915         if (row+k != idx[i+k]) { /* break in the block */
916           flg = PETSC_FALSE;
917           break;
918         }
919       }
920       if (flg == PETSC_TRUE) { /* No break in the bs */
921         sizes[j] = bs;
922         i+= bs;
923       } else {
924         sizes[j] = 1;
925         i++;
926       }
927     }
928   }
929   *bs_max = j;
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
935 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
936 {
937   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
938   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
939   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
940   PetscScalar zero = 0.0;
941   MatScalar   *aa;
942 
943   PetscFunctionBegin;
944   /* Make a copy of the IS and  sort it */
945   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
946   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
947 
948   /* allocate memory for rows,sizes */
949   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
950   sizes = rows + is_n;
951 
952   /* copy IS values to rows, and sort them */
953   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
954   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
955   if (baij->keepzeroedrows) {
956     for (i=0; i<is_n; i++) { sizes[i] = 1; }
957     bs_max = is_n;
958   } else {
959     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
960   }
961   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
962 
963   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
964     row   = rows[j];
965     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
966     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
967     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
968     if (sizes[i] == bs && !baij->keepzeroedrows) {
969       if (diag) {
970         if (baij->ilen[row/bs] > 0) {
971           baij->ilen[row/bs]       = 1;
972           baij->j[baij->i[row/bs]] = row/bs;
973           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
974         }
975         /* Now insert all the diagonal values for this bs */
976         for (k=0; k<bs; k++) {
977           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
978         }
979       } else { /* (!diag) */
980         baij->ilen[row/bs] = 0;
981       } /* end (!diag) */
982     } else { /* (sizes[i] != bs) */
983 #if defined (PETSC_USE_DEBUG)
984       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
985 #endif
986       for (k=0; k<count; k++) {
987         aa[0] =  zero;
988         aa    += bs;
989       }
990       if (diag) {
991         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
992       }
993     }
994   }
995 
996   ierr = PetscFree(rows);CHKERRQ(ierr);
997   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1003 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1004 {
1005   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1006   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1007   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1008   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1009   int         ridx,cidx,bs2=a->bs2,ierr;
1010   PetscTruth  roworiented=a->roworiented;
1011   MatScalar   *ap,value,*aa=a->a,*bap;
1012 
1013   PetscFunctionBegin;
1014   for (k=0; k<m; k++) { /* loop over added rows */
1015     row  = im[k]; brow = row/bs;
1016     if (row < 0) continue;
1017 #if defined(PETSC_USE_BOPT_g)
1018     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1019 #endif
1020     rp   = aj + ai[brow];
1021     ap   = aa + bs2*ai[brow];
1022     rmax = imax[brow];
1023     nrow = ailen[brow];
1024     low  = 0;
1025     for (l=0; l<n; l++) { /* loop over added columns */
1026       if (in[l] < 0) continue;
1027 #if defined(PETSC_USE_BOPT_g)
1028       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1029 #endif
1030       col = in[l]; bcol = col/bs;
1031       ridx = row % bs; cidx = col % bs;
1032       if (roworiented) {
1033         value = v[l + k*n];
1034       } else {
1035         value = v[k + l*m];
1036       }
1037       if (!sorted) low = 0; high = nrow;
1038       while (high-low > 7) {
1039         t = (low+high)/2;
1040         if (rp[t] > bcol) high = t;
1041         else              low  = t;
1042       }
1043       for (i=low; i<high; i++) {
1044         if (rp[i] > bcol) break;
1045         if (rp[i] == bcol) {
1046           bap  = ap +  bs2*i + bs*cidx + ridx;
1047           if (is == ADD_VALUES) *bap += value;
1048           else                  *bap  = value;
1049           goto noinsert1;
1050         }
1051       }
1052       if (nonew == 1) goto noinsert1;
1053       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1054       if (nrow >= rmax) {
1055         /* there is no extra room in row, therefore enlarge */
1056         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1057         MatScalar *new_a;
1058 
1059         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1060 
1061         /* Malloc new storage space */
1062         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1063 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1064         new_j   = (int*)(new_a + bs2*new_nz);
1065         new_i   = new_j + new_nz;
1066 
1067         /* copy over old data into new slots */
1068         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1069         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1070         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1071         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1072         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1073         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1074         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1075         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1076         /* free up old matrix storage */
1077         ierr = PetscFree(a->a);CHKERRQ(ierr);
1078         if (!a->singlemalloc) {
1079           ierr = PetscFree(a->i);CHKERRQ(ierr);
1080           ierr = PetscFree(a->j);CHKERRQ(ierr);
1081         }
1082         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1083         a->singlemalloc = PETSC_TRUE;
1084 
1085         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1086         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1087         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1088         a->maxnz += bs2*CHUNKSIZE;
1089         a->reallocs++;
1090         a->nz++;
1091       }
1092       N = nrow++ - 1;
1093       /* shift up all the later entries in this row */
1094       for (ii=N; ii>=i; ii--) {
1095         rp[ii+1] = rp[ii];
1096         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1097       }
1098       if (N>=i) {
1099         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1100       }
1101       rp[i]                      = bcol;
1102       ap[bs2*i + bs*cidx + ridx] = value;
1103       noinsert1:;
1104       low = i;
1105     }
1106     ailen[brow] = nrow;
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1114 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1115 {
1116   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1117   Mat         outA;
1118   int         ierr;
1119   PetscTruth  row_identity,col_identity;
1120 
1121   PetscFunctionBegin;
1122   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1123   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1124   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1125   if (!row_identity || !col_identity) {
1126     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1127   }
1128 
1129   outA          = inA;
1130   inA->factor   = FACTOR_LU;
1131 
1132   if (!a->diag) {
1133     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1134   }
1135 
1136   a->row        = row;
1137   a->col        = col;
1138   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1139   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1140 
1141   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1142   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1143   PetscLogObjectParent(inA,a->icol);
1144 
1145   /*
1146       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1147       for ILU(0) factorization with natural ordering
1148   */
1149   if (a->bs < 8) {
1150     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1151   } else {
1152     if (!a->solve_work) {
1153       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1154       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1155     }
1156   }
1157 
1158   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1159 
1160   PetscFunctionReturn(0);
1161 }
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1164 int MatPrintHelp_SeqBAIJ(Mat A)
1165 {
1166   static PetscTruth called = PETSC_FALSE;
1167   MPI_Comm          comm = A->comm;
1168   int               ierr;
1169 
1170   PetscFunctionBegin;
1171   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1172   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1173   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 EXTERN_C_BEGIN
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1180 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1181 {
1182   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1183   int         i,nz,nbs;
1184 
1185   PetscFunctionBegin;
1186   nz  = baij->maxnz/baij->bs2;
1187   nbs = baij->nbs;
1188   for (i=0; i<nz; i++) {
1189     baij->j[i] = indices[i];
1190   }
1191   baij->nz = nz;
1192   for (i=0; i<nbs; i++) {
1193     baij->ilen[i] = baij->imax[i];
1194   }
1195 
1196   PetscFunctionReturn(0);
1197 }
1198 EXTERN_C_END
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1202 /*@
1203     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1204        in the matrix.
1205 
1206   Input Parameters:
1207 +  mat - the SeqBAIJ matrix
1208 -  indices - the column indices
1209 
1210   Level: advanced
1211 
1212   Notes:
1213     This can be called if you have precomputed the nonzero structure of the
1214   matrix and want to provide it to the matrix object to improve the performance
1215   of the MatSetValues() operation.
1216 
1217     You MUST have set the correct numbers of nonzeros per row in the call to
1218   MatCreateSeqBAIJ().
1219 
1220     MUST be called before any calls to MatSetValues();
1221 
1222 @*/
1223 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1224 {
1225   int ierr,(*f)(Mat,int *);
1226 
1227   PetscFunctionBegin;
1228   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1229   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1230   if (f) {
1231     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1232   } else {
1233     SETERRQ(1,"Wrong type of matrix to set column indices");
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1240 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1241 {
1242   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1243   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1244   PetscReal    atmp;
1245   PetscScalar  *x,zero = 0.0;
1246   MatScalar    *aa;
1247   int          ncols,brow,krow,kcol;
1248 
1249   PetscFunctionBegin;
1250   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1251   bs   = a->bs;
1252   aa   = a->a;
1253   ai   = a->i;
1254   aj   = a->j;
1255   mbs = a->mbs;
1256 
1257   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1258   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1259   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1260   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1261   for (i=0; i<mbs; i++) {
1262     ncols = ai[1] - ai[0]; ai++;
1263     brow  = bs*i;
1264     for (j=0; j<ncols; j++){
1265       /* bcol = bs*(*aj); */
1266       for (kcol=0; kcol<bs; kcol++){
1267         for (krow=0; krow<bs; krow++){
1268           atmp = PetscAbsScalar(*aa); aa++;
1269           row = brow + krow;    /* row index */
1270           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1271           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1272         }
1273       }
1274       aj++;
1275     }
1276   }
1277   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1283 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1284 {
1285   int        ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1294 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1295 {
1296   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1297   PetscFunctionBegin;
1298   *array = a->a;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1304 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1305 {
1306   PetscFunctionBegin;
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 /* -------------------------------------------------------------------*/
1311 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1312        MatGetRow_SeqBAIJ,
1313        MatRestoreRow_SeqBAIJ,
1314        MatMult_SeqBAIJ_N,
1315        MatMultAdd_SeqBAIJ_N,
1316        MatMultTranspose_SeqBAIJ,
1317        MatMultTransposeAdd_SeqBAIJ,
1318        MatSolve_SeqBAIJ_N,
1319        0,
1320        0,
1321        0,
1322        MatLUFactor_SeqBAIJ,
1323        0,
1324        0,
1325        MatTranspose_SeqBAIJ,
1326        MatGetInfo_SeqBAIJ,
1327        MatEqual_SeqBAIJ,
1328        MatGetDiagonal_SeqBAIJ,
1329        MatDiagonalScale_SeqBAIJ,
1330        MatNorm_SeqBAIJ,
1331        0,
1332        MatAssemblyEnd_SeqBAIJ,
1333        0,
1334        MatSetOption_SeqBAIJ,
1335        MatZeroEntries_SeqBAIJ,
1336        MatZeroRows_SeqBAIJ,
1337        MatLUFactorSymbolic_SeqBAIJ,
1338        MatLUFactorNumeric_SeqBAIJ_N,
1339        0,
1340        0,
1341        MatSetUpPreallocation_SeqBAIJ,
1342        MatILUFactorSymbolic_SeqBAIJ,
1343        0,
1344        MatGetArray_SeqBAIJ,
1345        MatRestoreArray_SeqBAIJ,
1346        MatDuplicate_SeqBAIJ,
1347        0,
1348        0,
1349        MatILUFactor_SeqBAIJ,
1350        0,
1351        0,
1352        MatGetSubMatrices_SeqBAIJ,
1353        MatIncreaseOverlap_SeqBAIJ,
1354        MatGetValues_SeqBAIJ,
1355        0,
1356        MatPrintHelp_SeqBAIJ,
1357        MatScale_SeqBAIJ,
1358        0,
1359        0,
1360        0,
1361        MatGetBlockSize_SeqBAIJ,
1362        MatGetRowIJ_SeqBAIJ,
1363        MatRestoreRowIJ_SeqBAIJ,
1364        0,
1365        0,
1366        0,
1367        0,
1368        0,
1369        0,
1370        MatSetValuesBlocked_SeqBAIJ,
1371        MatGetSubMatrix_SeqBAIJ,
1372        MatDestroy_SeqBAIJ,
1373        MatView_SeqBAIJ,
1374        MatGetPetscMaps_Petsc,
1375        0,
1376        0,
1377        0,
1378        0,
1379        0,
1380        0,
1381        MatGetRowMax_SeqBAIJ,
1382        MatConvert_Basic};
1383 
1384 EXTERN_C_BEGIN
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1387 int MatStoreValues_SeqBAIJ(Mat mat)
1388 {
1389   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1390   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1391   int         ierr;
1392 
1393   PetscFunctionBegin;
1394   if (aij->nonew != 1) {
1395     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1396   }
1397 
1398   /* allocate space for values if not already there */
1399   if (!aij->saved_values) {
1400     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1401   }
1402 
1403   /* copy values over */
1404   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 EXTERN_C_END
1408 
1409 EXTERN_C_BEGIN
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1412 int MatRetrieveValues_SeqBAIJ(Mat mat)
1413 {
1414   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1415   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1416 
1417   PetscFunctionBegin;
1418   if (aij->nonew != 1) {
1419     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1420   }
1421   if (!aij->saved_values) {
1422     SETERRQ(1,"Must call MatStoreValues(A);first");
1423   }
1424 
1425   /* copy values over */
1426   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 EXTERN_C_END
1430 
1431 EXTERN_C_BEGIN
1432 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1433 EXTERN_C_END
1434 
1435 EXTERN_C_BEGIN
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatCreate_SeqBAIJ"
1438 int MatCreate_SeqBAIJ(Mat B)
1439 {
1440   int         ierr,size;
1441   Mat_SeqBAIJ *b;
1442 
1443   PetscFunctionBegin;
1444   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1445   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1446 
1447   B->m = B->M = PetscMax(B->m,B->M);
1448   B->n = B->N = PetscMax(B->n,B->N);
1449   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1450   B->data = (void*)b;
1451   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1452   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1453   B->factor           = 0;
1454   B->lupivotthreshold = 1.0;
1455   B->mapping          = 0;
1456   b->row              = 0;
1457   b->col              = 0;
1458   b->icol             = 0;
1459   b->reallocs         = 0;
1460   b->saved_values     = 0;
1461 #if defined(PETSC_USE_MAT_SINGLE)
1462   b->setvalueslen     = 0;
1463   b->setvaluescopy    = PETSC_NULL;
1464 #endif
1465   b->single_precision_solves = PETSC_FALSE;
1466 
1467   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1468   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1469 
1470   b->sorted           = PETSC_FALSE;
1471   b->roworiented      = PETSC_TRUE;
1472   b->nonew            = 0;
1473   b->diag             = 0;
1474   b->solve_work       = 0;
1475   b->mult_work        = 0;
1476   b->spptr            = 0;
1477   B->info.nz_unneeded = (PetscReal)b->maxnz;
1478   b->keepzeroedrows   = PETSC_FALSE;
1479 
1480   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1481                                      "MatStoreValues_SeqBAIJ",
1482                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1483   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1484                                      "MatRetrieveValues_SeqBAIJ",
1485                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1486   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1487                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1488                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1490                                      "MatConvert_SeqBAIJ_SeqAIJ",
1491                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 EXTERN_C_END
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1498 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1499 {
1500   Mat         C;
1501   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1502   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1503 
1504   PetscFunctionBegin;
1505   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1506 
1507   *B = 0;
1508   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1509   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1510   c    = (Mat_SeqBAIJ*)C->data;
1511 
1512   c->bs         = a->bs;
1513   c->bs2        = a->bs2;
1514   c->mbs        = a->mbs;
1515   c->nbs        = a->nbs;
1516   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1517 
1518   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1519   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1520   for (i=0; i<mbs; i++) {
1521     c->imax[i] = a->imax[i];
1522     c->ilen[i] = a->ilen[i];
1523   }
1524 
1525   /* allocate the matrix space */
1526   c->singlemalloc = PETSC_TRUE;
1527   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1528   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1529   c->j = (int*)(c->a + nz*bs2);
1530   c->i = c->j + nz;
1531   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1532   if (mbs > 0) {
1533     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1534     if (cpvalues == MAT_COPY_VALUES) {
1535       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1536     } else {
1537       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1538     }
1539   }
1540 
1541   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1542   c->sorted      = a->sorted;
1543   c->roworiented = a->roworiented;
1544   c->nonew       = a->nonew;
1545 
1546   if (a->diag) {
1547     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1548     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1549     for (i=0; i<mbs; i++) {
1550       c->diag[i] = a->diag[i];
1551     }
1552   } else c->diag        = 0;
1553   c->nz                 = a->nz;
1554   c->maxnz              = a->maxnz;
1555   c->solve_work         = 0;
1556   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1557   c->mult_work          = 0;
1558   C->preallocated       = PETSC_TRUE;
1559   C->assembled          = PETSC_TRUE;
1560   *B = C;
1561   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatLoad_SeqBAIJ"
1568 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1569 {
1570   Mat_SeqBAIJ  *a;
1571   Mat          B;
1572   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1573   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1574   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1575   int          *masked,nmask,tmp,bs2,ishift;
1576   PetscScalar  *aa;
1577   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1578 
1579   PetscFunctionBegin;
1580   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1581   bs2  = bs*bs;
1582 
1583   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1584   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1585   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1586   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1587   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1588   M = header[1]; N = header[2]; nz = header[3];
1589 
1590   if (header[3] < 0) {
1591     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1592   }
1593 
1594   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1595 
1596   /*
1597      This code adds extra rows to make sure the number of rows is
1598     divisible by the blocksize
1599   */
1600   mbs        = M/bs;
1601   extra_rows = bs - M + bs*(mbs);
1602   if (extra_rows == bs) extra_rows = 0;
1603   else                  mbs++;
1604   if (extra_rows) {
1605     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1606   }
1607 
1608   /* read in row lengths */
1609   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1610   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1611   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1612 
1613   /* read in column indices */
1614   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1615   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1616   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1617 
1618   /* loop over row lengths determining block row lengths */
1619   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1620   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1621   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1622   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1623   masked   = mask + mbs;
1624   rowcount = 0; nzcount = 0;
1625   for (i=0; i<mbs; i++) {
1626     nmask = 0;
1627     for (j=0; j<bs; j++) {
1628       kmax = rowlengths[rowcount];
1629       for (k=0; k<kmax; k++) {
1630         tmp = jj[nzcount++]/bs;
1631         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1632       }
1633       rowcount++;
1634     }
1635     browlengths[i] += nmask;
1636     /* zero out the mask elements we set */
1637     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1638   }
1639 
1640   /* create our matrix */
1641   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1642   B = *A;
1643   a = (Mat_SeqBAIJ*)B->data;
1644 
1645   /* set matrix "i" values */
1646   a->i[0] = 0;
1647   for (i=1; i<= mbs; i++) {
1648     a->i[i]      = a->i[i-1] + browlengths[i-1];
1649     a->ilen[i-1] = browlengths[i-1];
1650   }
1651   a->nz         = 0;
1652   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1653 
1654   /* read in nonzero values */
1655   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1656   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1657   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1658 
1659   /* set "a" and "j" values into matrix */
1660   nzcount = 0; jcount = 0;
1661   for (i=0; i<mbs; i++) {
1662     nzcountb = nzcount;
1663     nmask    = 0;
1664     for (j=0; j<bs; j++) {
1665       kmax = rowlengths[i*bs+j];
1666       for (k=0; k<kmax; k++) {
1667         tmp = jj[nzcount++]/bs;
1668 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1669       }
1670     }
1671     /* sort the masked values */
1672     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1673 
1674     /* set "j" values into matrix */
1675     maskcount = 1;
1676     for (j=0; j<nmask; j++) {
1677       a->j[jcount++]  = masked[j];
1678       mask[masked[j]] = maskcount++;
1679     }
1680     /* set "a" values into matrix */
1681     ishift = bs2*a->i[i];
1682     for (j=0; j<bs; j++) {
1683       kmax = rowlengths[i*bs+j];
1684       for (k=0; k<kmax; k++) {
1685         tmp       = jj[nzcountb]/bs ;
1686         block     = mask[tmp] - 1;
1687         point     = jj[nzcountb] - bs*tmp;
1688         idx       = ishift + bs2*block + j + bs*point;
1689         a->a[idx] = (MatScalar)aa[nzcountb++];
1690       }
1691     }
1692     /* zero out the mask elements we set */
1693     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1694   }
1695   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1696 
1697   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1698   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1699   ierr = PetscFree(aa);CHKERRQ(ierr);
1700   ierr = PetscFree(jj);CHKERRQ(ierr);
1701   ierr = PetscFree(mask);CHKERRQ(ierr);
1702 
1703   B->assembled = PETSC_TRUE;
1704 
1705   ierr = MatView_Private(B);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 EXTERN_C_END
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatCreateSeqBAIJ"
1712 /*@C
1713    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1714    compressed row) format.  For good matrix assembly performance the
1715    user should preallocate the matrix storage by setting the parameter nz
1716    (or the array nnz).  By setting these parameters accurately, performance
1717    during matrix assembly can be increased by more than a factor of 50.
1718 
1719    Collective on MPI_Comm
1720 
1721    Input Parameters:
1722 +  comm - MPI communicator, set to PETSC_COMM_SELF
1723 .  bs - size of block
1724 .  m - number of rows
1725 .  n - number of columns
1726 .  nz - number of nonzero blocks  per block row (same for all rows)
1727 -  nnz - array containing the number of nonzero blocks in the various block rows
1728          (possibly different for each block row) or PETSC_NULL
1729 
1730    Output Parameter:
1731 .  A - the matrix
1732 
1733    Options Database Keys:
1734 .   -mat_no_unroll - uses code that does not unroll the loops in the
1735                      block calculations (much slower)
1736 .    -mat_block_size - size of the blocks to use
1737 
1738    Level: intermediate
1739 
1740    Notes:
1741    A nonzero block is any block that as 1 or more nonzeros in it
1742 
1743    The block AIJ format is fully compatible with standard Fortran 77
1744    storage.  That is, the stored row and column indices can begin at
1745    either one (as in Fortran) or zero.  See the users' manual for details.
1746 
1747    Specify the preallocated storage with either nz or nnz (not both).
1748    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1749    allocation.  For additional details, see the users manual chapter on
1750    matrices.
1751 
1752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1753 @*/
1754 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1755 {
1756   int ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1760   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1761   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1767 /*@C
1768    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1769    per row in the matrix. For good matrix assembly performance the
1770    user should preallocate the matrix storage by setting the parameter nz
1771    (or the array nnz).  By setting these parameters accurately, performance
1772    during matrix assembly can be increased by more than a factor of 50.
1773 
1774    Collective on MPI_Comm
1775 
1776    Input Parameters:
1777 +  A - the matrix
1778 .  bs - size of block
1779 .  nz - number of block nonzeros per block row (same for all rows)
1780 -  nnz - array containing the number of block nonzeros in the various block rows
1781          (possibly different for each block row) or PETSC_NULL
1782 
1783    Options Database Keys:
1784 .   -mat_no_unroll - uses code that does not unroll the loops in the
1785                      block calculations (much slower)
1786 .    -mat_block_size - size of the blocks to use
1787 
1788    Level: intermediate
1789 
1790    Notes:
1791    The block AIJ format is fully compatible with standard Fortran 77
1792    storage.  That is, the stored row and column indices can begin at
1793    either one (as in Fortran) or zero.  See the users' manual for details.
1794 
1795    Specify the preallocated storage with either nz or nnz (not both).
1796    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1797    allocation.  For additional details, see the users manual chapter on
1798    matrices.
1799 
1800 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1801 @*/
1802 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1803 {
1804   Mat_SeqBAIJ *b;
1805   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1806   PetscTruth  flg;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1810   if (!flg) PetscFunctionReturn(0);
1811 
1812   B->preallocated = PETSC_TRUE;
1813   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1814   if (nnz && newbs != bs) {
1815     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1816   }
1817   bs   = newbs;
1818 
1819   mbs  = B->m/bs;
1820   nbs  = B->n/bs;
1821   bs2  = bs*bs;
1822 
1823   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1824     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1825   }
1826 
1827   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1828   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1829   if (nnz) {
1830     for (i=0; i<mbs; i++) {
1831       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1832       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1833     }
1834   }
1835 
1836   b       = (Mat_SeqBAIJ*)B->data;
1837   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1838   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1839   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1840   if (!flg) {
1841     switch (bs) {
1842     case 1:
1843       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1844       B->ops->mult            = MatMult_SeqBAIJ_1;
1845       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1846       break;
1847     case 2:
1848       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1849       B->ops->mult            = MatMult_SeqBAIJ_2;
1850       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1851       break;
1852     case 3:
1853       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1854       B->ops->mult            = MatMult_SeqBAIJ_3;
1855       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1856       break;
1857     case 4:
1858       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1859       B->ops->mult            = MatMult_SeqBAIJ_4;
1860       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1861       break;
1862     case 5:
1863       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1864       B->ops->mult            = MatMult_SeqBAIJ_5;
1865       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1866       break;
1867     case 6:
1868       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1869       B->ops->mult            = MatMult_SeqBAIJ_6;
1870       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1871       break;
1872     case 7:
1873       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1874       B->ops->mult            = MatMult_SeqBAIJ_7;
1875       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1876       break;
1877     default:
1878       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1879       B->ops->mult            = MatMult_SeqBAIJ_N;
1880       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1881       break;
1882     }
1883   }
1884   b->bs      = bs;
1885   b->mbs     = mbs;
1886   b->nbs     = nbs;
1887   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1888   if (!nnz) {
1889     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1890     else if (nz <= 0)        nz = 1;
1891     for (i=0; i<mbs; i++) b->imax[i] = nz;
1892     nz = nz*mbs;
1893   } else {
1894     nz = 0;
1895     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1896   }
1897 
1898   /* allocate the matrix space */
1899   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1900   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1901   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1902   b->j  = (int*)(b->a + nz*bs2);
1903   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1904   b->i  = b->j + nz;
1905   b->singlemalloc = PETSC_TRUE;
1906 
1907   b->i[0] = 0;
1908   for (i=1; i<mbs+1; i++) {
1909     b->i[i] = b->i[i-1] + b->imax[i-1];
1910   }
1911 
1912   /* b->ilen will count nonzeros in each block row so far. */
1913   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1914   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1915   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1916 
1917   b->bs               = bs;
1918   b->bs2              = bs2;
1919   b->mbs              = mbs;
1920   b->nz               = 0;
1921   b->maxnz            = nz*bs2;
1922   B->info.nz_unneeded = (PetscReal)b->maxnz;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926