xref: /petsc/src/mat/impls/baij/seq/baij.c (revision dffd326726a45575e28365eeacb6df6bcc7bf309)
1 /*$Id: baij.c,v 1.190 1999/11/19 20:35:39 bsmith Exp balay $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         *diag = a->diag, *jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   for ( i=0; i<a->mbs; i++ ) {
27     if (jj[diag[i]] != i) {
28       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
29     }
30   }
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNC__
35 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
36 int MatMarkDiagonal_SeqBAIJ(Mat A)
37 {
38   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
39   int         i,j, *diag, m = a->mbs;
40 
41   PetscFunctionBegin;
42   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
43   PLogObjectMemory(A,(m+1)*sizeof(int));
44   for ( i=0; i<m; i++ ) {
45     diag[i] = a->i[i+1];
46     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
47       if (a->j[j] == i) {
48         diag[i] = j;
49         break;
50       }
51     }
52   }
53   a->diag = diag;
54   PetscFunctionReturn(0);
55 }
56 
57 
58 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
59 
60 #undef __FUNC__
61 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
62 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
63                             PetscTruth *done)
64 {
65   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
66   int         ierr, n = a->mbs,i;
67 
68   PetscFunctionBegin;
69   *nn = n;
70   if (!ia) PetscFunctionReturn(0);
71   if (symmetric) {
72     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
73   } else if (oshift == 1) {
74     /* temporarily add 1 to i and j indices */
75     int nz = a->i[n] + 1;
76     for ( i=0; i<nz; i++ ) a->j[i]++;
77     for ( i=0; i<n+1; i++ ) a->i[i]++;
78     *ia = a->i; *ja = a->j;
79   } else {
80     *ia = a->i; *ja = a->j;
81   }
82 
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNC__
87 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
88 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
89                                 PetscTruth *done)
90 {
91   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
92   int         i,n = a->mbs,ierr;
93 
94   PetscFunctionBegin;
95   if (!ia) PetscFunctionReturn(0);
96   if (symmetric) {
97     ierr = PetscFree(*ia);CHKERRQ(ierr);
98     ierr = PetscFree(*ja);CHKERRQ(ierr);
99   } else if (oshift == 1) {
100     int nz = a->i[n];
101     for ( i=0; i<nz; i++ ) a->j[i]--;
102     for ( i=0; i<n+1; i++ ) a->i[i]--;
103   }
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNC__
108 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
109 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
110 {
111   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
112 
113   PetscFunctionBegin;
114   *bs = baij->bs;
115   PetscFunctionReturn(0);
116 }
117 
118 
119 #undef __FUNC__
120 #define __FUNC__ "MatDestroy_SeqBAIJ"
121 int MatDestroy_SeqBAIJ(Mat A)
122 {
123   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
124   int         ierr;
125 
126   PetscFunctionBegin;
127   if (A->mapping) {
128     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
129   }
130   if (A->bmapping) {
131     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
132   }
133   if (A->rmap) {
134     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
135   }
136   if (A->cmap) {
137     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
138   }
139 #if defined(PETSC_USE_LOG)
140   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
141 #endif
142   ierr = PetscFree(a->a);CHKERRQ(ierr);
143   if (!a->singlemalloc) {
144     ierr = PetscFree(a->i);CHKERRQ(ierr);
145     ierr = PetscFree(a->j);CHKERRQ(ierr);
146   }
147   if (a->row) {
148     ierr = ISDestroy(a->row);CHKERRQ(ierr);
149   }
150   if (a->col) {
151     ierr = ISDestroy(a->col);CHKERRQ(ierr);
152   }
153   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
154   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
155   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
156   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
157   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
158   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
159   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
160   ierr = PetscFree(a);CHKERRQ(ierr);
161   PLogObjectDestroy(A);
162   PetscHeaderDestroy(A);
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNC__
167 #define __FUNC__ "MatSetOption_SeqBAIJ"
168 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
169 {
170   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
171 
172   PetscFunctionBegin;
173   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
174   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
175   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
176   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
177   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
178   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
179   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
180   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
181   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
182   else if (op == MAT_ROWS_SORTED ||
183            op == MAT_ROWS_UNSORTED ||
184            op == MAT_SYMMETRIC ||
185            op == MAT_STRUCTURALLY_SYMMETRIC ||
186            op == MAT_YES_NEW_DIAGONALS ||
187            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
188            op == MAT_USE_HASH_TABLE) {
189     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
190   } else if (op == MAT_NO_NEW_DIAGONALS) {
191     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
192   } else {
193     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 
199 #undef __FUNC__
200 #define __FUNC__ "MatGetSize_SeqBAIJ"
201 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
202 {
203   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
204 
205   PetscFunctionBegin;
206   if (m) *m = a->m;
207   if (n) *n = a->n;
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNC__
212 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
213 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
214 {
215   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
216 
217   PetscFunctionBegin;
218   *m = 0; *n = a->m;
219   PetscFunctionReturn(0);
220 }
221 
222 #undef __FUNC__
223 #define __FUNC__ "MatGetRow_SeqBAIJ"
224 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
225 {
226   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
227   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
228   MatScalar    *aa,*aa_i;
229   Scalar       *v_i;
230 
231   PetscFunctionBegin;
232   bs  = a->bs;
233   ai  = a->i;
234   aj  = a->j;
235   aa  = a->a;
236   bs2 = a->bs2;
237 
238   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
239 
240   bn  = row/bs;   /* Block number */
241   bp  = row % bs; /* Block Position */
242   M   = ai[bn+1] - ai[bn];
243   *nz = bs*M;
244 
245   if (v) {
246     *v = 0;
247     if (*nz) {
248       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
249       for ( i=0; i<M; i++ ) { /* for each block in the block row */
250         v_i  = *v + i*bs;
251         aa_i = aa + bs2*(ai[bn] + i);
252         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
253       }
254     }
255   }
256 
257   if (idx) {
258     *idx = 0;
259     if (*nz) {
260       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
261       for ( i=0; i<M; i++ ) { /* for each block in the block row */
262         idx_i = *idx + i*bs;
263         itmp  = bs*aj[ai[bn] + i];
264         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNC__
272 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
273 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
274 {
275   int ierr;
276 
277   PetscFunctionBegin;
278   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
279   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNC__
284 #define __FUNC__ "MatTranspose_SeqBAIJ"
285 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
286 {
287   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
288   Mat         C;
289   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
290   int         *rows,*cols,bs2=a->bs2;
291   MatScalar   *array = a->a;
292 
293   PetscFunctionBegin;
294   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
295   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
296   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
297 
298   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
299   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
300   ierr = PetscFree(col);CHKERRQ(ierr);
301   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
302   cols = rows + bs;
303   for ( i=0; i<mbs; i++ ) {
304     cols[0] = i*bs;
305     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
306     len = ai[i+1] - ai[i];
307     for ( j=0; j<len; j++ ) {
308       rows[0] = (*aj++)*bs;
309       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
310       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
311       array += bs2;
312     }
313   }
314   ierr = PetscFree(rows);CHKERRQ(ierr);
315 
316   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
317   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
318 
319   if (B) {
320     *B = C;
321   } else {
322     PetscOps *Abops;
323     MatOps   Aops;
324 
325     /* This isn't really an in-place transpose */
326     ierr = PetscFree(a->a);CHKERRQ(ierr);
327     if (!a->singlemalloc) {
328       ierr = PetscFree(a->i);CHKERRQ(ierr);
329       ierr = PetscFree(a->j);CHKERRQ(ierr);
330     }
331     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
332     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
333     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
334     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
335     ierr = PetscFree(a);CHKERRQ(ierr);
336 
337 
338     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
339     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
340 
341     /*
342        This is horrible, horrible code. We need to keep the
343       A pointers for the bops and ops but copy everything
344       else from C.
345     */
346     Abops    = A->bops;
347     Aops     = A->ops;
348     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
349     A->bops  = Abops;
350     A->ops   = Aops;
351     A->qlist = 0;
352 
353     PetscHeaderDestroy(C);
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNC__
359 #define __FUNC__ "MatView_SeqBAIJ_Binary"
360 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
361 {
362   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
363   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
364   Scalar      *aa;
365   FILE        *file;
366 
367   PetscFunctionBegin;
368   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
369   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
370   col_lens[0] = MAT_COOKIE;
371 
372   col_lens[1] = a->m;
373   col_lens[2] = a->n;
374   col_lens[3] = a->nz*bs2;
375 
376   /* store lengths of each row and write (including header) to file */
377   count = 0;
378   for ( i=0; i<a->mbs; i++ ) {
379     for ( j=0; j<bs; j++ ) {
380       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
381     }
382   }
383   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
384   ierr = PetscFree(col_lens);CHKERRQ(ierr);
385 
386   /* store column indices (zero start index) */
387   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
388   count = 0;
389   for ( i=0; i<a->mbs; i++ ) {
390     for ( j=0; j<bs; j++ ) {
391       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
392         for ( l=0; l<bs; l++ ) {
393           jj[count++] = bs*a->j[k] + l;
394         }
395       }
396     }
397   }
398   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
399   ierr = PetscFree(jj);CHKERRQ(ierr);
400 
401   /* store nonzero values */
402   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
403   count = 0;
404   for ( i=0; i<a->mbs; i++ ) {
405     for ( j=0; j<bs; j++ ) {
406       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
407         for ( l=0; l<bs; l++ ) {
408           aa[count++] = a->a[bs2*k + l*bs + j];
409         }
410       }
411     }
412   }
413   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
414   ierr = PetscFree(aa);CHKERRQ(ierr);
415 
416   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
417   if (file) {
418     fprintf(file,"-matload_block_size %d\n",a->bs);
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNC__
424 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
425 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
426 {
427   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
428   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
429   char        *outputname;
430 
431   PetscFunctionBegin;
432   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
433   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
434   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
435     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
436   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
437     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
438   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
439     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
440     for ( i=0; i<a->mbs; i++ ) {
441       for ( j=0; j<bs; j++ ) {
442         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
443         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
444           for ( l=0; l<bs; l++ ) {
445 #if defined(PETSC_USE_COMPLEX)
446             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
447               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
448                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
449             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
450               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
451                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
453               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
454             }
455 #else
456             if (a->a[bs2*k + l*bs + j] != 0.0) {
457               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
458             }
459 #endif
460           }
461         }
462         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
463       }
464     }
465     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
466   } else {
467     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
468     for ( i=0; i<a->mbs; i++ ) {
469       for ( j=0; j<bs; j++ ) {
470         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
471         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
472           for ( l=0; l<bs; l++ ) {
473 #if defined(PETSC_USE_COMPLEX)
474             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
475               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
476                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
477             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
478               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
479                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             } else {
481               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
482             }
483 #else
484             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
485 #endif
486           }
487         }
488         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
489       }
490     }
491     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
492   }
493   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNC__
498 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
499 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
500 {
501   Mat          A = (Mat) Aa;
502   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
503   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
504   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
505   MatScalar    *aa;
506   MPI_Comm     comm;
507   Viewer       viewer;
508 
509   PetscFunctionBegin;
510   /*
511       This is nasty. If this is called from an originally parallel matrix
512    then all processes call this, but only the first has the matrix so the
513    rest should return immediately.
514   */
515   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
516   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
517   if (rank) PetscFunctionReturn(0);
518 
519   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
520 
521   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
522 
523   /* loop over matrix elements drawing boxes */
524   color = DRAW_BLUE;
525   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
526     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
527       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
528       x_l = a->j[j]*bs; x_r = x_l + 1.0;
529       aa = a->a + j*bs2;
530       for ( k=0; k<bs; k++ ) {
531         for ( l=0; l<bs; l++ ) {
532           if (PetscReal(*aa++) >=  0.) continue;
533           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
534         }
535       }
536     }
537   }
538   color = DRAW_CYAN;
539   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
540     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
541       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
542       x_l = a->j[j]*bs; x_r = x_l + 1.0;
543       aa = a->a + j*bs2;
544       for ( k=0; k<bs; k++ ) {
545         for ( l=0; l<bs; l++ ) {
546           if (PetscReal(*aa++) != 0.) continue;
547           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552 
553   color = DRAW_RED;
554   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
555     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
556       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
557       x_l = a->j[j]*bs; x_r = x_l + 1.0;
558       aa = a->a + j*bs2;
559       for ( k=0; k<bs; k++ ) {
560         for ( l=0; l<bs; l++ ) {
561           if (PetscReal(*aa++) <= 0.) continue;
562           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNC__
571 #define __FUNC__ "MatView_SeqBAIJ_Draw"
572 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
573 {
574   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
575   int          ierr;
576   double       xl,yl,xr,yr,w,h;
577   Draw         draw;
578   PetscTruth   isnull;
579 
580   PetscFunctionBegin;
581 
582   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
583   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
584 
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
586   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
587   xr += w;    yr += h;  xl = -w;     yl = -h;
588   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
589   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
590   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatView_SeqBAIJ"
596 int MatView_SeqBAIJ(Mat A,Viewer viewer)
597 {
598   int        ierr;
599   PetscTruth issocket,isascii,isbinary,isdraw;
600 
601   PetscFunctionBegin;
602   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
603   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
604   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
606   if (issocket) {
607     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
608   } else if (isascii){
609     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
610   } else if (isbinary) {
611     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
612   } else if (isdraw) {
613     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
614   } else {
615     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 
621 #undef __FUNC__
622 #define __FUNC__ "MatGetValues_SeqBAIJ"
623 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
624 {
625   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
626   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
627   int        *ai = a->i, *ailen = a->ilen;
628   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
629   MatScalar  *ap, *aa = a->a, zero = 0.0;
630 
631   PetscFunctionBegin;
632   for ( k=0; k<m; k++ ) { /* loop over rows */
633     row  = im[k]; brow = row/bs;
634     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
635     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
636     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
637     nrow = ailen[brow];
638     for ( l=0; l<n; l++ ) { /* loop over columns */
639       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
640       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
641       col  = in[l] ;
642       bcol = col/bs;
643       cidx = col%bs;
644       ridx = row%bs;
645       high = nrow;
646       low  = 0; /* assume unsorted */
647       while (high-low > 5) {
648         t = (low+high)/2;
649         if (rp[t] > bcol) high = t;
650         else             low  = t;
651       }
652       for ( i=low; i<high; i++ ) {
653         if (rp[i] > bcol) break;
654         if (rp[i] == bcol) {
655           *v++ = ap[bs2*i+bs*cidx+ridx];
656           goto finished;
657         }
658       }
659       *v++ = zero;
660       finished:;
661     }
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 
667 #undef __FUNC__
668 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
669 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
670 {
671   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
672   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
673   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
674   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
675   Scalar      *value = v;
676   MatScalar   *ap,*aa=a->a,*bap;
677 
678   PetscFunctionBegin;
679   if (roworiented) {
680     stepval = (n-1)*bs;
681   } else {
682     stepval = (m-1)*bs;
683   }
684   for ( k=0; k<m; k++ ) { /* loop over added rows */
685     row  = im[k];
686     if (row < 0) continue;
687 #if defined(PETSC_USE_BOPT_g)
688     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
689 #endif
690     rp   = aj + ai[row];
691     ap   = aa + bs2*ai[row];
692     rmax = imax[row];
693     nrow = ailen[row];
694     low  = 0;
695     for ( l=0; l<n; l++ ) { /* loop over added columns */
696       if (in[l] < 0) continue;
697 #if defined(PETSC_USE_BOPT_g)
698       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
699 #endif
700       col = in[l];
701       if (roworiented) {
702         value = v + k*(stepval+bs)*bs + l*bs;
703       } else {
704         value = v + l*(stepval+bs)*bs + k*bs;
705       }
706       if (!sorted) low = 0; high = nrow;
707       while (high-low > 7) {
708         t = (low+high)/2;
709         if (rp[t] > col) high = t;
710         else             low  = t;
711       }
712       for ( i=low; i<high; i++ ) {
713         if (rp[i] > col) break;
714         if (rp[i] == col) {
715           bap  = ap +  bs2*i;
716           if (roworiented) {
717             if (is == ADD_VALUES) {
718               for ( ii=0; ii<bs; ii++,value+=stepval ) {
719                 for (jj=ii; jj<bs2; jj+=bs ) {
720                   bap[jj] += *value++;
721                 }
722               }
723             } else {
724               for ( ii=0; ii<bs; ii++,value+=stepval ) {
725                 for (jj=ii; jj<bs2; jj+=bs ) {
726                   bap[jj] = *value++;
727                 }
728               }
729             }
730           } else {
731             if (is == ADD_VALUES) {
732               for ( ii=0; ii<bs; ii++,value+=stepval ) {
733                 for (jj=0; jj<bs; jj++ ) {
734                   *bap++ += *value++;
735                 }
736               }
737             } else {
738               for ( ii=0; ii<bs; ii++,value+=stepval ) {
739                 for (jj=0; jj<bs; jj++ ) {
740                   *bap++  = *value++;
741                 }
742               }
743             }
744           }
745           goto noinsert2;
746         }
747       }
748       if (nonew == 1) goto noinsert2;
749       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
750       if (nrow >= rmax) {
751         /* there is no extra room in row, therefore enlarge */
752         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
753         MatScalar *new_a;
754 
755         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
756 
757         /* malloc new storage space */
758         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
759         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
760         new_j   = (int *) (new_a + bs2*new_nz);
761         new_i   = new_j + new_nz;
762 
763         /* copy over old data into new slots */
764         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
765         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
766         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
767         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
768         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
769         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
770         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
771         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
772         /* free up old matrix storage */
773         ierr = PetscFree(a->a);CHKERRQ(ierr);
774         if (!a->singlemalloc) {
775           ierr = PetscFree(a->i);CHKERRQ(ierr);
776           ierr = PetscFree(a->j);CHKERRQ(ierr);
777         }
778         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
779         a->singlemalloc = PETSC_TRUE;
780 
781         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
782         rmax = imax[row] = imax[row] + CHUNKSIZE;
783         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
784         a->maxnz += bs2*CHUNKSIZE;
785         a->reallocs++;
786         a->nz++;
787       }
788       N = nrow++ - 1;
789       /* shift up all the later entries in this row */
790       for ( ii=N; ii>=i; ii-- ) {
791         rp[ii+1] = rp[ii];
792         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
793       }
794       if (N >= i) {
795         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
796       }
797       rp[i] = col;
798       bap   = ap +  bs2*i;
799       if (roworiented) {
800         for ( ii=0; ii<bs; ii++,value+=stepval) {
801           for (jj=ii; jj<bs2; jj+=bs ) {
802             bap[jj] = *value++;
803           }
804         }
805       } else {
806         for ( ii=0; ii<bs; ii++,value+=stepval ) {
807           for (jj=0; jj<bs; jj++ ) {
808             *bap++  = *value++;
809           }
810         }
811       }
812       noinsert2:;
813       low = i;
814     }
815     ailen[row] = nrow;
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
823 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
824 {
825   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
826   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
827   int        m = a->m,*ip, N, *ailen = a->ilen;
828   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
829   MatScalar  *aa = a->a, *ap;
830 
831   PetscFunctionBegin;
832   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
833 
834   if (m) rmax = ailen[0];
835   for ( i=1; i<mbs; i++ ) {
836     /* move each row back by the amount of empty slots (fshift) before it*/
837     fshift += imax[i-1] - ailen[i-1];
838     rmax   = PetscMax(rmax,ailen[i]);
839     if (fshift) {
840       ip = aj + ai[i]; ap = aa + bs2*ai[i];
841       N = ailen[i];
842       for ( j=0; j<N; j++ ) {
843         ip[j-fshift] = ip[j];
844         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
845       }
846     }
847     ai[i] = ai[i-1] + ailen[i-1];
848   }
849   if (mbs) {
850     fshift += imax[mbs-1] - ailen[mbs-1];
851     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
852   }
853   /* reset ilen and imax for each row */
854   for ( i=0; i<mbs; i++ ) {
855     ailen[i] = imax[i] = ai[i+1] - ai[i];
856   }
857   a->nz = ai[mbs];
858 
859   /* diagonals may have moved, so kill the diagonal pointers */
860   if (fshift && a->diag) {
861     ierr = PetscFree(a->diag);CHKERRQ(ierr);
862     PLogObjectMemory(A,-(m+1)*sizeof(int));
863     a->diag = 0;
864   }
865   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
866            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
867   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
868            a->reallocs);
869   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
870   a->reallocs          = 0;
871   A->info.nz_unneeded  = (double)fshift*bs2;
872 
873   PetscFunctionReturn(0);
874 }
875 
876 
877 
878 /*
879    This function returns an array of flags which indicate the locations of contiguous
880    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
881    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
882    Assume: sizes should be long enough to hold all the values.
883 */
884 #undef __FUNC__
885 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
886 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
887 {
888   int        i,j,k,row;
889   PetscTruth flg;
890 
891   PetscFunctionBegin;
892   for ( i=0,j=0; i<n; j++ ) {
893     row = idx[i];
894     if (row%bs!=0) { /* Not the begining of a block */
895       sizes[j] = 1;
896       i++;
897     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
898       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
899       i++;
900     } else { /* Begining of the block, so check if the complete block exists */
901       flg = PETSC_TRUE;
902       for ( k=1; k<bs; k++ ) {
903         if (row+k != idx[i+k]) { /* break in the block */
904           flg = PETSC_FALSE;
905           break;
906         }
907       }
908       if (flg == PETSC_TRUE) { /* No break in the bs */
909         sizes[j] = bs;
910         i+= bs;
911       } else {
912         sizes[j] = 1;
913         i++;
914       }
915     }
916   }
917   *bs_max = j;
918   PetscFunctionReturn(0);
919 }
920 
921 #undef __FUNC__
922 #define __FUNC__ "MatZeroRows_SeqBAIJ"
923 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
924 {
925   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
926   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
927   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
928   Scalar      zero = 0.0;
929   MatScalar   *aa;
930 
931   PetscFunctionBegin;
932   /* Make a copy of the IS and  sort it */
933   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
934   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
935 
936   /* allocate memory for rows,sizes */
937   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
938   sizes = rows + is_n;
939 
940   /* initialize copy IS valurs to rows, and sort them */
941   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
942   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
943   if (baij->keepzeroedrows) {
944     for ( i=0; i<is_n; i++ ) { sizes[i] = 1; }
945     bs_max = is_n;
946   } else {
947     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
948   }
949   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
950 
951   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
952     row   = rows[j];
953     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
954     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
955     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
956     if (sizes[i] == bs && !baij->keepzeroedrows) {
957       if (diag) {
958         if (baij->ilen[row/bs] > 0) {
959           baij->ilen[row/bs] = 1;
960           baij->j[baij->i[row/bs]] = row/bs;
961           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
962         }
963         /* Now insert all the diagoanl values for this bs */
964         for ( k=0; k<bs; k++ ) {
965           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
966         }
967       } else { /* (!diag) */
968         baij->ilen[row/bs] = 0;
969       } /* end (!diag) */
970     } else { /* (sizes[i] != bs) */
971 #if defined (PETSC_USE_DEBUG)
972       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
973 #endif
974       for ( k=0; k<count; k++ ) {
975         aa[0] = zero;
976         aa+=bs;
977       }
978       if (diag) {
979         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
980       }
981     }
982   }
983 
984   ierr = PetscFree(rows);CHKERRQ(ierr);
985   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNC__
990 #define __FUNC__ "MatSetValues_SeqBAIJ"
991 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
992 {
993   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
994   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
995   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
996   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
997   int         ridx,cidx,bs2=a->bs2,ierr;
998   MatScalar   *ap,value,*aa=a->a,*bap;
999 
1000   PetscFunctionBegin;
1001   for ( k=0; k<m; k++ ) { /* loop over added rows */
1002     row  = im[k]; brow = row/bs;
1003     if (row < 0) continue;
1004 #if defined(PETSC_USE_BOPT_g)
1005     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1006 #endif
1007     rp   = aj + ai[brow];
1008     ap   = aa + bs2*ai[brow];
1009     rmax = imax[brow];
1010     nrow = ailen[brow];
1011     low  = 0;
1012     for ( l=0; l<n; l++ ) { /* loop over added columns */
1013       if (in[l] < 0) continue;
1014 #if defined(PETSC_USE_BOPT_g)
1015       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1016 #endif
1017       col = in[l]; bcol = col/bs;
1018       ridx = row % bs; cidx = col % bs;
1019       if (roworiented) {
1020         value = v[l + k*n];
1021       } else {
1022         value = v[k + l*m];
1023       }
1024       if (!sorted) low = 0; high = nrow;
1025       while (high-low > 7) {
1026         t = (low+high)/2;
1027         if (rp[t] > bcol) high = t;
1028         else              low  = t;
1029       }
1030       for ( i=low; i<high; i++ ) {
1031         if (rp[i] > bcol) break;
1032         if (rp[i] == bcol) {
1033           bap  = ap +  bs2*i + bs*cidx + ridx;
1034           if (is == ADD_VALUES) *bap += value;
1035           else                  *bap  = value;
1036           goto noinsert1;
1037         }
1038       }
1039       if (nonew == 1) goto noinsert1;
1040       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1041       if (nrow >= rmax) {
1042         /* there is no extra room in row, therefore enlarge */
1043         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1044         MatScalar *new_a;
1045 
1046         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1047 
1048         /* Malloc new storage space */
1049         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1050         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1051         new_j   = (int *) (new_a + bs2*new_nz);
1052         new_i   = new_j + new_nz;
1053 
1054         /* copy over old data into new slots */
1055         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1056         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1057         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1058         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1059         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1060         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1061         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1062         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1063         /* free up old matrix storage */
1064         ierr = PetscFree(a->a);CHKERRQ(ierr);
1065         if (!a->singlemalloc) {
1066           ierr = PetscFree(a->i);CHKERRQ(ierr);
1067           ierr = PetscFree(a->j);CHKERRQ(ierr);
1068         }
1069         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1070         a->singlemalloc = PETSC_TRUE;
1071 
1072         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1073         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1074         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1075         a->maxnz += bs2*CHUNKSIZE;
1076         a->reallocs++;
1077         a->nz++;
1078       }
1079       N = nrow++ - 1;
1080       /* shift up all the later entries in this row */
1081       for ( ii=N; ii>=i; ii-- ) {
1082         rp[ii+1] = rp[ii];
1083         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1084       }
1085       if (N>=i) {
1086         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1087       }
1088       rp[i]                      = bcol;
1089       ap[bs2*i + bs*cidx + ridx] = value;
1090       noinsert1:;
1091       low = i;
1092     }
1093     ailen[brow] = nrow;
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1099 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1100 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1101 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1102 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1103 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1104 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1105 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1106 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1107 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1108 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1109 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1110 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1111 extern int MatZeroEntries_SeqBAIJ(Mat);
1112 
1113 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1114 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1115 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1116 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1117 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1118 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1119 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1120 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1121 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec);
1122 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec);
1123 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec);
1124 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec);
1125 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec);
1126 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec);
1127 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec);
1128 
1129 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1130 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1131 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1132 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1133 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1134 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1135 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1136 
1137 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1138 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1139 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1140 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1141 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1142 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1143 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1144 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1145 
1146 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1147 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1148 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1149 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1150 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1151 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1152 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1153 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1154 
1155 #undef __FUNC__
1156 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1157 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1158 {
1159   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1160   Mat         outA;
1161   int         ierr;
1162   PetscTruth  row_identity, col_identity;
1163 
1164   PetscFunctionBegin;
1165   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1166   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1167   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1168   if (!row_identity || !col_identity) {
1169     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1170   }
1171 
1172   outA          = inA;
1173   inA->factor   = FACTOR_LU;
1174 
1175   if (!a->diag) {
1176     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1177   }
1178   /*
1179       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1180       for ILU(0) factorization with natural ordering
1181   */
1182   switch (a->bs) {
1183   case 1:
1184     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1185     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1186   case 2:
1187     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1188     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1189     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1190     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1191     break;
1192   case 3:
1193     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1194     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1195     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3_NaturalOrdering;
1196     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1197     break;
1198   case 4:
1199     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1200     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1201     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4_NaturalOrdering;
1202     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1203     break;
1204   case 5:
1205     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1206     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1207     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5_NaturalOrdering;
1208     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1209     break;
1210   case 6:
1211     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1212     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1213     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6_NaturalOrdering;
1214     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1215     break;
1216   case 7:
1217     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1218     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7_NaturalOrdering;
1219     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1220     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1221     break;
1222   default:
1223     a->row        = row;
1224     a->col        = col;
1225     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1226     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1227 
1228     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1229     ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1230     PLogObjectParent(inA,a->icol);
1231 
1232     if (!a->solve_work) {
1233       a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1234       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1235     }
1236   }
1237 
1238   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1239 
1240   PetscFunctionReturn(0);
1241 }
1242 #undef __FUNC__
1243 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1244 int MatPrintHelp_SeqBAIJ(Mat A)
1245 {
1246   static PetscTruth called = PETSC_FALSE;
1247   MPI_Comm          comm = A->comm;
1248   int               ierr;
1249 
1250   PetscFunctionBegin;
1251   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1252   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1253   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 EXTERN_C_BEGIN
1258 #undef __FUNC__
1259 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1260 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1261 {
1262   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1263   int         i,nz,n;
1264 
1265   PetscFunctionBegin;
1266   nz = baij->maxnz;
1267   n  = baij->n;
1268   for (i=0; i<nz; i++) {
1269     baij->j[i] = indices[i];
1270   }
1271   baij->nz = nz;
1272   for ( i=0; i<n; i++ ) {
1273     baij->ilen[i] = baij->imax[i];
1274   }
1275 
1276   PetscFunctionReturn(0);
1277 }
1278 EXTERN_C_END
1279 
1280 #undef __FUNC__
1281 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1282 /*@
1283     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1284        in the matrix.
1285 
1286   Input Parameters:
1287 +  mat - the SeqBAIJ matrix
1288 -  indices - the column indices
1289 
1290   Level: advanced
1291 
1292   Notes:
1293     This can be called if you have precomputed the nonzero structure of the
1294   matrix and want to provide it to the matrix object to improve the performance
1295   of the MatSetValues() operation.
1296 
1297     You MUST have set the correct numbers of nonzeros per row in the call to
1298   MatCreateSeqBAIJ().
1299 
1300     MUST be called before any calls to MatSetValues();
1301 
1302 @*/
1303 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1304 {
1305   int ierr,(*f)(Mat,int *);
1306 
1307   PetscFunctionBegin;
1308   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1309   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1310   if (f) {
1311     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1312   } else {
1313     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1314   }
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /* -------------------------------------------------------------------*/
1319 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1320        MatGetRow_SeqBAIJ,
1321        MatRestoreRow_SeqBAIJ,
1322        MatMult_SeqBAIJ_N,
1323        MatMultAdd_SeqBAIJ_N,
1324        MatMultTrans_SeqBAIJ,
1325        MatMultTransAdd_SeqBAIJ,
1326        MatSolve_SeqBAIJ_N,
1327        0,
1328        0,
1329        0,
1330        MatLUFactor_SeqBAIJ,
1331        0,
1332        0,
1333        MatTranspose_SeqBAIJ,
1334        MatGetInfo_SeqBAIJ,
1335        MatEqual_SeqBAIJ,
1336        MatGetDiagonal_SeqBAIJ,
1337        MatDiagonalScale_SeqBAIJ,
1338        MatNorm_SeqBAIJ,
1339        0,
1340        MatAssemblyEnd_SeqBAIJ,
1341        0,
1342        MatSetOption_SeqBAIJ,
1343        MatZeroEntries_SeqBAIJ,
1344        MatZeroRows_SeqBAIJ,
1345        MatLUFactorSymbolic_SeqBAIJ,
1346        MatLUFactorNumeric_SeqBAIJ_N,
1347        0,
1348        0,
1349        MatGetSize_SeqBAIJ,
1350        MatGetSize_SeqBAIJ,
1351        MatGetOwnershipRange_SeqBAIJ,
1352        MatILUFactorSymbolic_SeqBAIJ,
1353        0,
1354        0,
1355        0,
1356        MatDuplicate_SeqBAIJ,
1357        0,
1358        0,
1359        MatILUFactor_SeqBAIJ,
1360        0,
1361        0,
1362        MatGetSubMatrices_SeqBAIJ,
1363        MatIncreaseOverlap_SeqBAIJ,
1364        MatGetValues_SeqBAIJ,
1365        0,
1366        MatPrintHelp_SeqBAIJ,
1367        MatScale_SeqBAIJ,
1368        0,
1369        0,
1370        0,
1371        MatGetBlockSize_SeqBAIJ,
1372        MatGetRowIJ_SeqBAIJ,
1373        MatRestoreRowIJ_SeqBAIJ,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379        0,
1380        MatSetValuesBlocked_SeqBAIJ,
1381        MatGetSubMatrix_SeqBAIJ,
1382        0,
1383        0,
1384        MatGetMaps_Petsc};
1385 
1386 EXTERN_C_BEGIN
1387 #undef __FUNC__
1388 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1389 int MatStoreValues_SeqBAIJ(Mat mat)
1390 {
1391   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1392   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1393   int         ierr;
1394 
1395   PetscFunctionBegin;
1396   if (aij->nonew != 1) {
1397     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1398   }
1399 
1400   /* allocate space for values if not already there */
1401   if (!aij->saved_values) {
1402     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1403   }
1404 
1405   /* copy values over */
1406   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 EXTERN_C_END
1410 
1411 EXTERN_C_BEGIN
1412 #undef __FUNC__
1413 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1414 int MatRetrieveValues_SeqBAIJ(Mat mat)
1415 {
1416   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1417   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1418 
1419   PetscFunctionBegin;
1420   if (aij->nonew != 1) {
1421     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1422   }
1423   if (!aij->saved_values) {
1424     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1425   }
1426 
1427   /* copy values over */
1428   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 EXTERN_C_END
1432 
1433 #undef __FUNC__
1434 #define __FUNC__ "MatCreateSeqBAIJ"
1435 /*@C
1436    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1437    compressed row) format.  For good matrix assembly performance the
1438    user should preallocate the matrix storage by setting the parameter nz
1439    (or the array nnz).  By setting these parameters accurately, performance
1440    during matrix assembly can be increased by more than a factor of 50.
1441 
1442    Collective on MPI_Comm
1443 
1444    Input Parameters:
1445 +  comm - MPI communicator, set to PETSC_COMM_SELF
1446 .  bs - size of block
1447 .  m - number of rows
1448 .  n - number of columns
1449 .  nz - number of block nonzeros per block row (same for all rows)
1450 -  nnz - array containing the number of block nonzeros in the various block rows
1451          (possibly different for each block row) or PETSC_NULL
1452 
1453    Output Parameter:
1454 .  A - the matrix
1455 
1456    Options Database Keys:
1457 .   -mat_no_unroll - uses code that does not unroll the loops in the
1458                      block calculations (much slower)
1459 .    -mat_block_size - size of the blocks to use
1460 
1461    Level: intermediate
1462 
1463    Notes:
1464    The block AIJ format is fully compatible with standard Fortran 77
1465    storage.  That is, the stored row and column indices can begin at
1466    either one (as in Fortran) or zero.  See the users' manual for details.
1467 
1468    Specify the preallocated storage with either nz or nnz (not both).
1469    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1470    allocation.  For additional details, see the users manual chapter on
1471    matrices.
1472 
1473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1474 @*/
1475 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1476 {
1477   Mat         B;
1478   Mat_SeqBAIJ *b;
1479   int         i,len,ierr,mbs,nbs,bs2,size;
1480   PetscTruth  flg;
1481 
1482   PetscFunctionBegin;
1483   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1484   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1485 
1486   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1487   mbs  = m/bs;
1488   nbs  = n/bs;
1489   bs2  = bs*bs;
1490 
1491   if (mbs*bs!=m || nbs*bs!=n) {
1492     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1493   }
1494 
1495   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1496   if (nnz) {
1497     for (i=0; i<mbs; i++) {
1498       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1499     }
1500   }
1501 
1502   *A = 0;
1503   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1504   PLogObjectCreate(B);
1505   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1506   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1507   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1508   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1509   if (!flg) {
1510     switch (bs) {
1511     case 1:
1512       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1513       B->ops->solve           = MatSolve_SeqBAIJ_1;
1514       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_1;
1515       B->ops->mult            = MatMult_SeqBAIJ_1;
1516       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1517       break;
1518     case 2:
1519       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1520       B->ops->solve           = MatSolve_SeqBAIJ_2;
1521       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2;
1522       B->ops->mult            = MatMult_SeqBAIJ_2;
1523       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1524       break;
1525     case 3:
1526       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1527       B->ops->solve           = MatSolve_SeqBAIJ_3;
1528       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3;
1529       B->ops->mult            = MatMult_SeqBAIJ_3;
1530       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1531       break;
1532     case 4:
1533       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1534       B->ops->solve           = MatSolve_SeqBAIJ_4;
1535       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4;
1536       B->ops->mult            = MatMult_SeqBAIJ_4;
1537       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1538       break;
1539     case 5:
1540       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1541       B->ops->solve           = MatSolve_SeqBAIJ_5;
1542       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5;
1543       B->ops->mult            = MatMult_SeqBAIJ_5;
1544       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1545       break;
1546     case 6:
1547       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1548       B->ops->solve           = MatSolve_SeqBAIJ_6;
1549       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6;
1550       B->ops->mult            = MatMult_SeqBAIJ_6;
1551       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1552       break;
1553     case 7:
1554       B->ops->mult            = MatMult_SeqBAIJ_7;
1555       B->ops->solve           = MatSolve_SeqBAIJ_7;
1556       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7;
1557       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1558       break;
1559     }
1560   }
1561   B->ops->destroy     = MatDestroy_SeqBAIJ;
1562   B->ops->view        = MatView_SeqBAIJ;
1563   B->factor           = 0;
1564   B->lupivotthreshold = 1.0;
1565   B->mapping          = 0;
1566   b->row              = 0;
1567   b->col              = 0;
1568   b->icol             = 0;
1569   b->reallocs         = 0;
1570   b->saved_values     = 0;
1571 
1572   b->m       = m; B->m = m; B->M = m;
1573   b->n       = n; B->n = n; B->N = n;
1574 
1575   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1576   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1577 
1578   b->mbs     = mbs;
1579   b->nbs     = nbs;
1580   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1581   if (!nnz) {
1582     if (nz == PETSC_DEFAULT) nz = 5;
1583     else if (nz <= 0)        nz = 1;
1584     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1585     nz = nz*mbs;
1586   } else {
1587     nz = 0;
1588     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1589   }
1590 
1591   /* allocate the matrix space */
1592   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1593   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1594   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1595   b->j  = (int *) (b->a + nz*bs2);
1596   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1597   b->i  = b->j + nz;
1598   b->singlemalloc = PETSC_TRUE;
1599 
1600   b->i[0] = 0;
1601   for (i=1; i<mbs+1; i++) {
1602     b->i[i] = b->i[i-1] + b->imax[i-1];
1603   }
1604 
1605   /* b->ilen will count nonzeros in each block row so far. */
1606   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1607   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1608   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1609 
1610   b->bs               = bs;
1611   b->bs2              = bs2;
1612   b->mbs              = mbs;
1613   b->nz               = 0;
1614   b->maxnz            = nz*bs2;
1615   b->sorted           = PETSC_FALSE;
1616   b->roworiented      = PETSC_TRUE;
1617   b->nonew            = 0;
1618   b->diag             = 0;
1619   b->solve_work       = 0;
1620   b->mult_work        = 0;
1621   b->spptr            = 0;
1622   B->info.nz_unneeded = (double)b->maxnz;
1623 
1624   *A = B;
1625   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1626   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1627 
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1629                                      "MatStoreValues_SeqBAIJ",
1630                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1632                                      "MatRetrieveValues_SeqBAIJ",
1633                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1635                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1636                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 #undef __FUNC__
1641 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1642 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1643 {
1644   Mat         C;
1645   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1646   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1647 
1648   PetscFunctionBegin;
1649   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1650 
1651   *B = 0;
1652   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1653   PLogObjectCreate(C);
1654   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1655   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1656   C->ops->destroy = MatDestroy_SeqBAIJ;
1657   C->ops->view    = MatView_SeqBAIJ;
1658   C->factor       = A->factor;
1659   c->row          = 0;
1660   c->col          = 0;
1661   c->icol         = 0;
1662   c->saved_values = 0;
1663   C->assembled    = PETSC_TRUE;
1664 
1665   c->m = C->m   = a->m;
1666   c->n = C->n   = a->n;
1667   C->M          = a->m;
1668   C->N          = a->n;
1669 
1670   c->bs         = a->bs;
1671   c->bs2        = a->bs2;
1672   c->mbs        = a->mbs;
1673   c->nbs        = a->nbs;
1674 
1675   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1676   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1677   for ( i=0; i<mbs; i++ ) {
1678     c->imax[i] = a->imax[i];
1679     c->ilen[i] = a->ilen[i];
1680   }
1681 
1682   /* allocate the matrix space */
1683   c->singlemalloc = PETSC_TRUE;
1684   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1685   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1686   c->j = (int *) (c->a + nz*bs2);
1687   c->i = c->j + nz;
1688   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1689   if (mbs > 0) {
1690     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1691     if (cpvalues == MAT_COPY_VALUES) {
1692       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1693     } else {
1694       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1695     }
1696   }
1697 
1698   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1699   c->sorted      = a->sorted;
1700   c->roworiented = a->roworiented;
1701   c->nonew       = a->nonew;
1702 
1703   if (a->diag) {
1704     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1705     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1706     for ( i=0; i<mbs; i++ ) {
1707       c->diag[i] = a->diag[i];
1708     }
1709   } else c->diag        = 0;
1710   c->nz                 = a->nz;
1711   c->maxnz              = a->maxnz;
1712   c->solve_work         = 0;
1713   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1714   c->mult_work          = 0;
1715   *B = C;
1716   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNC__
1721 #define __FUNC__ "MatLoad_SeqBAIJ"
1722 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1723 {
1724   Mat_SeqBAIJ  *a;
1725   Mat          B;
1726   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1727   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1728   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1729   int          *masked, nmask,tmp,bs2,ishift;
1730   Scalar       *aa;
1731   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1732 
1733   PetscFunctionBegin;
1734   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1735   bs2  = bs*bs;
1736 
1737   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1738   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1739   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1740   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1741   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1742   M = header[1]; N = header[2]; nz = header[3];
1743 
1744   if (header[3] < 0) {
1745     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1746   }
1747 
1748   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1749 
1750   /*
1751      This code adds extra rows to make sure the number of rows is
1752     divisible by the blocksize
1753   */
1754   mbs        = M/bs;
1755   extra_rows = bs - M + bs*(mbs);
1756   if (extra_rows == bs) extra_rows = 0;
1757   else                  mbs++;
1758   if (extra_rows) {
1759     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1760   }
1761 
1762   /* read in row lengths */
1763   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1764   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1765   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1766 
1767   /* read in column indices */
1768   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1769   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1770   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1771 
1772   /* loop over row lengths determining block row lengths */
1773   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1774   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1775   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1776   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1777   masked      = mask + mbs;
1778   rowcount    = 0; nzcount = 0;
1779   for ( i=0; i<mbs; i++ ) {
1780     nmask = 0;
1781     for ( j=0; j<bs; j++ ) {
1782       kmax = rowlengths[rowcount];
1783       for ( k=0; k<kmax; k++ ) {
1784         tmp = jj[nzcount++]/bs;
1785         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1786       }
1787       rowcount++;
1788     }
1789     browlengths[i] += nmask;
1790     /* zero out the mask elements we set */
1791     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1792   }
1793 
1794   /* create our matrix */
1795   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1796   B = *A;
1797   a = (Mat_SeqBAIJ *) B->data;
1798 
1799   /* set matrix "i" values */
1800   a->i[0] = 0;
1801   for ( i=1; i<= mbs; i++ ) {
1802     a->i[i]      = a->i[i-1] + browlengths[i-1];
1803     a->ilen[i-1] = browlengths[i-1];
1804   }
1805   a->nz         = 0;
1806   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1807 
1808   /* read in nonzero values */
1809   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1810   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1811   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1812 
1813   /* set "a" and "j" values into matrix */
1814   nzcount = 0; jcount = 0;
1815   for ( i=0; i<mbs; i++ ) {
1816     nzcountb = nzcount;
1817     nmask    = 0;
1818     for ( j=0; j<bs; j++ ) {
1819       kmax = rowlengths[i*bs+j];
1820       for ( k=0; k<kmax; k++ ) {
1821         tmp = jj[nzcount++]/bs;
1822 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1823       }
1824       rowcount++;
1825     }
1826     /* sort the masked values */
1827     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1828 
1829     /* set "j" values into matrix */
1830     maskcount = 1;
1831     for ( j=0; j<nmask; j++ ) {
1832       a->j[jcount++]  = masked[j];
1833       mask[masked[j]] = maskcount++;
1834     }
1835     /* set "a" values into matrix */
1836     ishift = bs2*a->i[i];
1837     for ( j=0; j<bs; j++ ) {
1838       kmax = rowlengths[i*bs+j];
1839       for ( k=0; k<kmax; k++ ) {
1840         tmp       = jj[nzcountb]/bs ;
1841         block     = mask[tmp] - 1;
1842         point     = jj[nzcountb] - bs*tmp;
1843         idx       = ishift + bs2*block + j + bs*point;
1844         a->a[idx] = aa[nzcountb++];
1845       }
1846     }
1847     /* zero out the mask elements we set */
1848     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1849   }
1850   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1851 
1852   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1853   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1854   ierr = PetscFree(aa);CHKERRQ(ierr);
1855   ierr = PetscFree(jj);CHKERRQ(ierr);
1856   ierr = PetscFree(mask);CHKERRQ(ierr);
1857 
1858   B->assembled = PETSC_TRUE;
1859 
1860   ierr = MatView_Private(B);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 
1865 
1866