xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f1af5d2ffeae1f5fc391a89939f4818e47770ae3)
1 /*$Id: baij.c,v 1.187 1999/10/24 14:02:28 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
19 int MatMissingDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         *diag = a->diag, *jj = a->j,i;
23 
24   PetscFunctionBegin;
25   for ( i=0; i<a->mbs; i++ ) {
26     if (jj[diag[i]] != i) {
27       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
28     }
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNC__
34 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
35 int MatMarkDiag_SeqBAIJ(Mat A)
36 {
37   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
38   int         i,j, *diag, m = a->mbs;
39 
40   PetscFunctionBegin;
41   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
42   PLogObjectMemory(A,(m+1)*sizeof(int));
43   for ( i=0; i<m; i++ ) {
44     diag[i] = a->i[i+1];
45     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
46       if (a->j[j] == i) {
47         diag[i] = j;
48         break;
49       }
50     }
51   }
52   a->diag = diag;
53   PetscFunctionReturn(0);
54 }
55 
56 
57 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
58 
59 #undef __FUNC__
60 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
61 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
62                             PetscTruth *done)
63 {
64   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
65   int         ierr, n = a->mbs,i;
66 
67   PetscFunctionBegin;
68   *nn = n;
69   if (!ia) PetscFunctionReturn(0);
70   if (symmetric) {
71     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
72   } else if (oshift == 1) {
73     /* temporarily add 1 to i and j indices */
74     int nz = a->i[n] + 1;
75     for ( i=0; i<nz; i++ ) a->j[i]++;
76     for ( i=0; i<n+1; i++ ) a->i[i]++;
77     *ia = a->i; *ja = a->j;
78   } else {
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNC__
86 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
87 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
88                                 PetscTruth *done)
89 {
90   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
91   int         i,n = a->mbs,ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95   if (symmetric) {
96     ierr = PetscFree(*ia);CHKERRQ(ierr);
97     ierr = PetscFree(*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) {
99     int nz = a->i[n];
100     for ( i=0; i<nz; i++ ) a->j[i]--;
101     for ( i=0; i<n+1; i++ ) a->i[i]--;
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
108 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
109 {
110   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
111 
112   PetscFunctionBegin;
113   *bs = baij->bs;
114   PetscFunctionReturn(0);
115 }
116 
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatDestroy_SeqBAIJ"
120 int MatDestroy_SeqBAIJ(Mat A)
121 {
122   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
123   int         ierr;
124 
125   PetscFunctionBegin;
126   if (A->mapping) {
127     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
128   }
129   if (A->bmapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
131   }
132   if (A->rmap) {
133     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
134   }
135   if (A->cmap) {
136     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
137   }
138 #if defined(PETSC_USE_LOG)
139   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
140 #endif
141   ierr = PetscFree(a->a);CHKERRQ(ierr);
142   if (!a->singlemalloc) {
143     ierr = PetscFree(a->i);CHKERRQ(ierr);
144     ierr = PetscFree(a->j);CHKERRQ(ierr);
145   }
146   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
147   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
148   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
149   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
150   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
151   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
152   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
153   ierr = PetscFree(a);CHKERRQ(ierr);
154   PLogObjectDestroy(A);
155   PetscHeaderDestroy(A);
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNC__
160 #define __FUNC__ "MatSetOption_SeqBAIJ"
161 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
162 {
163   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
164 
165   PetscFunctionBegin;
166   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
167   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
168   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
169   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
170   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
171   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
172   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
173   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
174   else if (op == MAT_ROWS_SORTED ||
175            op == MAT_ROWS_UNSORTED ||
176            op == MAT_SYMMETRIC ||
177            op == MAT_STRUCTURALLY_SYMMETRIC ||
178            op == MAT_YES_NEW_DIAGONALS ||
179            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
180            op == MAT_USE_HASH_TABLE) {
181     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
182   } else if (op == MAT_NO_NEW_DIAGONALS) {
183     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
184   } else {
185     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 
191 #undef __FUNC__
192 #define __FUNC__ "MatGetSize_SeqBAIJ"
193 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
194 {
195   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
196 
197   PetscFunctionBegin;
198   if (m) *m = a->m;
199   if (n) *n = a->n;
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
205 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
206 {
207   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
208 
209   PetscFunctionBegin;
210   *m = 0; *n = a->m;
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNC__
215 #define __FUNC__ "MatGetRow_SeqBAIJ"
216 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
217 {
218   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
219   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
220   MatScalar    *aa,*aa_i;
221   Scalar       *v_i;
222 
223   PetscFunctionBegin;
224   bs  = a->bs;
225   ai  = a->i;
226   aj  = a->j;
227   aa  = a->a;
228   bs2 = a->bs2;
229 
230   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
231 
232   bn  = row/bs;   /* Block number */
233   bp  = row % bs; /* Block Position */
234   M   = ai[bn+1] - ai[bn];
235   *nz = bs*M;
236 
237   if (v) {
238     *v = 0;
239     if (*nz) {
240       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
241       for ( i=0; i<M; i++ ) { /* for each block in the block row */
242         v_i  = *v + i*bs;
243         aa_i = aa + bs2*(ai[bn] + i);
244         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
245       }
246     }
247   }
248 
249   if (idx) {
250     *idx = 0;
251     if (*nz) {
252       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
253       for ( i=0; i<M; i++ ) { /* for each block in the block row */
254         idx_i = *idx + i*bs;
255         itmp  = bs*aj[ai[bn] + i];
256         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNC__
264 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
265 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
266 {
267   int ierr;
268 
269   PetscFunctionBegin;
270   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
271   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNC__
276 #define __FUNC__ "MatTranspose_SeqBAIJ"
277 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
278 {
279   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
280   Mat         C;
281   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
282   int         *rows,*cols,bs2=a->bs2;
283   MatScalar   *array = a->a;
284 
285   PetscFunctionBegin;
286   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
287   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
288   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
289 
290   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
291   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
292   ierr = PetscFree(col);CHKERRQ(ierr);
293   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
294   cols = rows + bs;
295   for ( i=0; i<mbs; i++ ) {
296     cols[0] = i*bs;
297     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
298     len = ai[i+1] - ai[i];
299     for ( j=0; j<len; j++ ) {
300       rows[0] = (*aj++)*bs;
301       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
302       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
303       array += bs2;
304     }
305   }
306   ierr = PetscFree(rows);CHKERRQ(ierr);
307 
308   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
309   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
310 
311   if (B != PETSC_NULL) {
312     *B = C;
313   } else {
314     PetscOps *Abops;
315     MatOps   Aops;
316 
317     /* This isn't really an in-place transpose */
318     ierr = PetscFree(a->a);CHKERRQ(ierr);
319     if (!a->singlemalloc) {
320       ierr = PetscFree(a->i);CHKERRQ(ierr);
321       ierr = PetscFree(a->j);CHKERRQ(ierr);
322     }
323     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
324     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
325     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
326     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
327     ierr = PetscFree(a);CHKERRQ(ierr);
328 
329 
330     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
331     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
332 
333     /*
334        This is horrible, horrible code. We need to keep the
335       A pointers for the bops and ops but copy everything
336       else from C.
337     */
338     Abops    = A->bops;
339     Aops     = A->ops;
340     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
341     A->bops  = Abops;
342     A->ops   = Aops;
343     A->qlist = 0;
344 
345     PetscHeaderDestroy(C);
346   }
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ "MatView_SeqBAIJ_Binary"
352 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
353 {
354   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
355   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
356   Scalar      *aa;
357   FILE        *file;
358 
359   PetscFunctionBegin;
360   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
361   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
362   col_lens[0] = MAT_COOKIE;
363 
364   col_lens[1] = a->m;
365   col_lens[2] = a->n;
366   col_lens[3] = a->nz*bs2;
367 
368   /* store lengths of each row and write (including header) to file */
369   count = 0;
370   for ( i=0; i<a->mbs; i++ ) {
371     for ( j=0; j<bs; j++ ) {
372       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
373     }
374   }
375   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
376   ierr = PetscFree(col_lens);CHKERRQ(ierr);
377 
378   /* store column indices (zero start index) */
379   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
380   count = 0;
381   for ( i=0; i<a->mbs; i++ ) {
382     for ( j=0; j<bs; j++ ) {
383       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
384         for ( l=0; l<bs; l++ ) {
385           jj[count++] = bs*a->j[k] + l;
386         }
387       }
388     }
389   }
390   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
391   ierr = PetscFree(jj);CHKERRQ(ierr);
392 
393   /* store nonzero values */
394   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
395   count = 0;
396   for ( i=0; i<a->mbs; i++ ) {
397     for ( j=0; j<bs; j++ ) {
398       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
399         for ( l=0; l<bs; l++ ) {
400           aa[count++] = a->a[bs2*k + l*bs + j];
401         }
402       }
403     }
404   }
405   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
406   ierr = PetscFree(aa);CHKERRQ(ierr);
407 
408   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
409   if (file) {
410     fprintf(file,"-matload_block_size %d\n",a->bs);
411   }
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNC__
416 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
417 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
418 {
419   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
420   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
421   char        *outputname;
422 
423   PetscFunctionBegin;
424   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
425   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
426   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
427     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
428   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
429     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
430   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
431     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
432     for ( i=0; i<a->mbs; i++ ) {
433       for ( j=0; j<bs; j++ ) {
434         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
435         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
436           for ( l=0; l<bs; l++ ) {
437 #if defined(PETSC_USE_COMPLEX)
438             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
439               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
440                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
441             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
442               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
443                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
444             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
445               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
446             }
447 #else
448             if (a->a[bs2*k + l*bs + j] != 0.0) {
449               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
450             }
451 #endif
452           }
453         }
454         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
455       }
456     }
457     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
458   } else {
459     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
460     for ( i=0; i<a->mbs; i++ ) {
461       for ( j=0; j<bs; j++ ) {
462         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
463         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
464           for ( l=0; l<bs; l++ ) {
465 #if defined(PETSC_USE_COMPLEX)
466             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
467               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
468                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
469             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
470               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
471                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
472             } else {
473               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
474             }
475 #else
476             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
477 #endif
478           }
479         }
480         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
481       }
482     }
483     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
484   }
485   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNC__
490 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
491 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
492 {
493   Mat          A = (Mat) Aa;
494   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
495   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
496   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
497   MatScalar    *aa;
498   MPI_Comm     comm;
499   Viewer       viewer;
500 
501   PetscFunctionBegin;
502   /*
503       This is nasty. If this is called from an originally parallel matrix
504    then all processes call this, but only the first has the matrix so the
505    rest should return immediately.
506   */
507   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
508   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
509   if (rank) PetscFunctionReturn(0);
510 
511   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
512 
513   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
514 
515   /* loop over matrix elements drawing boxes */
516   color = DRAW_BLUE;
517   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
518     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
519       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
520       x_l = a->j[j]*bs; x_r = x_l + 1.0;
521       aa = a->a + j*bs2;
522       for ( k=0; k<bs; k++ ) {
523         for ( l=0; l<bs; l++ ) {
524           if (PetscReal(*aa++) >=  0.) continue;
525           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   color = DRAW_CYAN;
531   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
532     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
533       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
534       x_l = a->j[j]*bs; x_r = x_l + 1.0;
535       aa = a->a + j*bs2;
536       for ( k=0; k<bs; k++ ) {
537         for ( l=0; l<bs; l++ ) {
538           if (PetscReal(*aa++) != 0.) continue;
539           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
540         }
541       }
542     }
543   }
544 
545   color = DRAW_RED;
546   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
547     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
548       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
549       x_l = a->j[j]*bs; x_r = x_l + 1.0;
550       aa = a->a + j*bs2;
551       for ( k=0; k<bs; k++ ) {
552         for ( l=0; l<bs; l++ ) {
553           if (PetscReal(*aa++) <= 0.) continue;
554           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
555         }
556       }
557     }
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNC__
563 #define __FUNC__ "MatView_SeqBAIJ_Draw"
564 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
565 {
566   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
567   int          ierr;
568   double       xl,yl,xr,yr,w,h;
569   Draw         draw;
570   PetscTruth   isnull;
571 
572   PetscFunctionBegin;
573 
574   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
575   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
576 
577   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
578   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
579   xr += w;    yr += h;  xl = -w;     yl = -h;
580   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
581   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
582   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNC__
587 #define __FUNC__ "MatView_SeqBAIJ"
588 int MatView_SeqBAIJ(Mat A,Viewer viewer)
589 {
590   int        ierr;
591   PetscTruth issocket,isascii,isbinary,isdraw;
592 
593   PetscFunctionBegin;
594   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
596   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
597   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
598   if (issocket) {
599     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
600   } else if (isascii){
601     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
602   } else if (isbinary) {
603     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
604   } else if (isdraw) {
605     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetValues_SeqBAIJ"
615 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
616 {
617   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
618   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
619   int        *ai = a->i, *ailen = a->ilen;
620   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
621   MatScalar  *ap, *aa = a->a, zero = 0.0;
622 
623   PetscFunctionBegin;
624   for ( k=0; k<m; k++ ) { /* loop over rows */
625     row  = im[k]; brow = row/bs;
626     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
627     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
628     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
629     nrow = ailen[brow];
630     for ( l=0; l<n; l++ ) { /* loop over columns */
631       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
632       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
633       col  = in[l] ;
634       bcol = col/bs;
635       cidx = col%bs;
636       ridx = row%bs;
637       high = nrow;
638       low  = 0; /* assume unsorted */
639       while (high-low > 5) {
640         t = (low+high)/2;
641         if (rp[t] > bcol) high = t;
642         else             low  = t;
643       }
644       for ( i=low; i<high; i++ ) {
645         if (rp[i] > bcol) break;
646         if (rp[i] == bcol) {
647           *v++ = ap[bs2*i+bs*cidx+ridx];
648           goto finished;
649         }
650       }
651       *v++ = zero;
652       finished:;
653     }
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 
659 #undef __FUNC__
660 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
661 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
662 {
663   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
664   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
665   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
666   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
667   Scalar      *value = v;
668   MatScalar   *ap,*aa=a->a,*bap;
669 
670   PetscFunctionBegin;
671   if (roworiented) {
672     stepval = (n-1)*bs;
673   } else {
674     stepval = (m-1)*bs;
675   }
676   for ( k=0; k<m; k++ ) { /* loop over added rows */
677     row  = im[k];
678     if (row < 0) continue;
679 #if defined(PETSC_USE_BOPT_g)
680     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
681 #endif
682     rp   = aj + ai[row];
683     ap   = aa + bs2*ai[row];
684     rmax = imax[row];
685     nrow = ailen[row];
686     low  = 0;
687     for ( l=0; l<n; l++ ) { /* loop over added columns */
688       if (in[l] < 0) continue;
689 #if defined(PETSC_USE_BOPT_g)
690       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
691 #endif
692       col = in[l];
693       if (roworiented) {
694         value = v + k*(stepval+bs)*bs + l*bs;
695       } else {
696         value = v + l*(stepval+bs)*bs + k*bs;
697       }
698       if (!sorted) low = 0; high = nrow;
699       while (high-low > 7) {
700         t = (low+high)/2;
701         if (rp[t] > col) high = t;
702         else             low  = t;
703       }
704       for ( i=low; i<high; i++ ) {
705         if (rp[i] > col) break;
706         if (rp[i] == col) {
707           bap  = ap +  bs2*i;
708           if (roworiented) {
709             if (is == ADD_VALUES) {
710               for ( ii=0; ii<bs; ii++,value+=stepval ) {
711                 for (jj=ii; jj<bs2; jj+=bs ) {
712                   bap[jj] += *value++;
713                 }
714               }
715             } else {
716               for ( ii=0; ii<bs; ii++,value+=stepval ) {
717                 for (jj=ii; jj<bs2; jj+=bs ) {
718                   bap[jj] = *value++;
719                 }
720               }
721             }
722           } else {
723             if (is == ADD_VALUES) {
724               for ( ii=0; ii<bs; ii++,value+=stepval ) {
725                 for (jj=0; jj<bs; jj++ ) {
726                   *bap++ += *value++;
727                 }
728               }
729             } else {
730               for ( ii=0; ii<bs; ii++,value+=stepval ) {
731                 for (jj=0; jj<bs; jj++ ) {
732                   *bap++  = *value++;
733                 }
734               }
735             }
736           }
737           goto noinsert2;
738         }
739       }
740       if (nonew == 1) goto noinsert2;
741       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
742       if (nrow >= rmax) {
743         /* there is no extra room in row, therefore enlarge */
744         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
745         MatScalar *new_a;
746 
747         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
748 
749         /* malloc new storage space */
750         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
751         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
752         new_j   = (int *) (new_a + bs2*new_nz);
753         new_i   = new_j + new_nz;
754 
755         /* copy over old data into new slots */
756         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
757         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
758         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
759         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
760         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
761         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
762         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
763         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
764         /* free up old matrix storage */
765         ierr = PetscFree(a->a);CHKERRQ(ierr);
766         if (!a->singlemalloc) {
767           ierr = PetscFree(a->i);CHKERRQ(ierr);
768           ierr = PetscFree(a->j);CHKERRQ(ierr);
769         }
770         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
771         a->singlemalloc = 1;
772 
773         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
774         rmax = imax[row] = imax[row] + CHUNKSIZE;
775         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
776         a->maxnz += bs2*CHUNKSIZE;
777         a->reallocs++;
778         a->nz++;
779       }
780       N = nrow++ - 1;
781       /* shift up all the later entries in this row */
782       for ( ii=N; ii>=i; ii-- ) {
783         rp[ii+1] = rp[ii];
784         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
785       }
786       if (N >= i) {
787         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
788       }
789       rp[i] = col;
790       bap   = ap +  bs2*i;
791       if (roworiented) {
792         for ( ii=0; ii<bs; ii++,value+=stepval) {
793           for (jj=ii; jj<bs2; jj+=bs ) {
794             bap[jj] = *value++;
795           }
796         }
797       } else {
798         for ( ii=0; ii<bs; ii++,value+=stepval ) {
799           for (jj=0; jj<bs; jj++ ) {
800             *bap++  = *value++;
801           }
802         }
803       }
804       noinsert2:;
805       low = i;
806     }
807     ailen[row] = nrow;
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 
813 #undef __FUNC__
814 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
815 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
816 {
817   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
818   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
819   int        m = a->m,*ip, N, *ailen = a->ilen;
820   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
821   MatScalar  *aa = a->a, *ap;
822 
823   PetscFunctionBegin;
824   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
825 
826   if (m) rmax = ailen[0];
827   for ( i=1; i<mbs; i++ ) {
828     /* move each row back by the amount of empty slots (fshift) before it*/
829     fshift += imax[i-1] - ailen[i-1];
830     rmax   = PetscMax(rmax,ailen[i]);
831     if (fshift) {
832       ip = aj + ai[i]; ap = aa + bs2*ai[i];
833       N = ailen[i];
834       for ( j=0; j<N; j++ ) {
835         ip[j-fshift] = ip[j];
836         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
837       }
838     }
839     ai[i] = ai[i-1] + ailen[i-1];
840   }
841   if (mbs) {
842     fshift += imax[mbs-1] - ailen[mbs-1];
843     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
844   }
845   /* reset ilen and imax for each row */
846   for ( i=0; i<mbs; i++ ) {
847     ailen[i] = imax[i] = ai[i+1] - ai[i];
848   }
849   a->nz = ai[mbs];
850 
851   /* diagonals may have moved, so kill the diagonal pointers */
852   if (fshift && a->diag) {
853     ierr = PetscFree(a->diag);CHKERRQ(ierr);
854     PLogObjectMemory(A,-(m+1)*sizeof(int));
855     a->diag = 0;
856   }
857   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
858            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
859   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
860            a->reallocs);
861   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
862   a->reallocs          = 0;
863   A->info.nz_unneeded  = (double)fshift*bs2;
864 
865   PetscFunctionReturn(0);
866 }
867 
868 
869 
870 /*
871    This function returns an array of flags which indicate the locations of contiguous
872    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
873    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
874    Assume: sizes should be long enough to hold all the values.
875 */
876 #undef __FUNC__
877 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
878 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
879 {
880   int        i,j,k,row;
881   PetscTruth flg;
882 
883   PetscFunctionBegin;
884   for ( i=0,j=0; i<n; j++ ) {
885     row = idx[i];
886     if (row%bs!=0) { /* Not the begining of a block */
887       sizes[j] = 1;
888       i++;
889     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
890       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
891       i++;
892     } else { /* Begining of the block, so check if the complete block exists */
893       flg = PETSC_TRUE;
894       for ( k=1; k<bs; k++ ) {
895         if (row+k != idx[i+k]) { /* break in the block */
896           flg = PETSC_FALSE;
897           break;
898         }
899       }
900       if (flg == PETSC_TRUE) { /* No break in the bs */
901         sizes[j] = bs;
902         i+= bs;
903       } else {
904         sizes[j] = 1;
905         i++;
906       }
907     }
908   }
909   *bs_max = j;
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNC__
914 #define __FUNC__ "MatZeroRows_SeqBAIJ"
915 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
916 {
917   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
918   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
919   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
920   Scalar      zero = 0.0;
921   MatScalar   *aa;
922 
923   PetscFunctionBegin;
924   /* Make a copy of the IS and  sort it */
925   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
926   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
927 
928   /* allocate memory for rows,sizes */
929   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
930   sizes = rows + is_n;
931 
932   /* initialize copy IS valurs to rows, and sort them */
933   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
934   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
935   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
936   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
937 
938   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
939     row   = rows[j];
940     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
941     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
942     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
943     if (sizes[i] == bs) {
944       if (diag) {
945         if (baij->ilen[row/bs] > 0) {
946           baij->ilen[row/bs] = 1;
947           baij->j[baij->i[row/bs]] = row/bs;
948           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
949         }
950         /* Now insert all the diagoanl values for this bs */
951         for ( k=0; k<bs; k++ ) {
952           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
953         }
954       } else { /* (!diag) */
955         baij->ilen[row/bs] = 0;
956       } /* end (!diag) */
957     } else { /* (sizes[i] != bs) */
958 #if defined (PETSC_USE_DEBUG)
959       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
960 #endif
961       for ( k=0; k<count; k++ ) {
962         aa[0] = zero;
963         aa+=bs;
964       }
965       if (diag) {
966         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
967       }
968     }
969   }
970 
971   ierr = PetscFree(rows);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNC__
977 #define __FUNC__ "MatSetValues_SeqBAIJ"
978 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
979 {
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
981   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
982   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
983   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
984   int         ridx,cidx,bs2=a->bs2,ierr;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for ( k=0; k<m; k++ ) { /* loop over added rows */
989     row  = im[k]; brow = row/bs;
990     if (row < 0) continue;
991 #if defined(PETSC_USE_BOPT_g)
992     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
993 #endif
994     rp   = aj + ai[brow];
995     ap   = aa + bs2*ai[brow];
996     rmax = imax[brow];
997     nrow = ailen[brow];
998     low  = 0;
999     for ( l=0; l<n; l++ ) { /* loop over added columns */
1000       if (in[l] < 0) continue;
1001 #if defined(PETSC_USE_BOPT_g)
1002       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1003 #endif
1004       col = in[l]; bcol = col/bs;
1005       ridx = row % bs; cidx = col % bs;
1006       if (roworiented) {
1007         value = v[l + k*n];
1008       } else {
1009         value = v[k + l*m];
1010       }
1011       if (!sorted) low = 0; high = nrow;
1012       while (high-low > 7) {
1013         t = (low+high)/2;
1014         if (rp[t] > bcol) high = t;
1015         else              low  = t;
1016       }
1017       for ( i=low; i<high; i++ ) {
1018         if (rp[i] > bcol) break;
1019         if (rp[i] == bcol) {
1020           bap  = ap +  bs2*i + bs*cidx + ridx;
1021           if (is == ADD_VALUES) *bap += value;
1022           else                  *bap  = value;
1023           goto noinsert1;
1024         }
1025       }
1026       if (nonew == 1) goto noinsert1;
1027       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1028       if (nrow >= rmax) {
1029         /* there is no extra room in row, therefore enlarge */
1030         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1031         MatScalar *new_a;
1032 
1033         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1034 
1035         /* Malloc new storage space */
1036         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1037         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1038         new_j   = (int *) (new_a + bs2*new_nz);
1039         new_i   = new_j + new_nz;
1040 
1041         /* copy over old data into new slots */
1042         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1043         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1044         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1045         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1046         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1047         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1048         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1049         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1050         /* free up old matrix storage */
1051         ierr = PetscFree(a->a);CHKERRQ(ierr);
1052         if (!a->singlemalloc) {
1053           ierr = PetscFree(a->i);CHKERRQ(ierr);
1054           ierr = PetscFree(a->j);CHKERRQ(ierr);
1055         }
1056         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1057         a->singlemalloc = 1;
1058 
1059         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1060         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1061         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1062         a->maxnz += bs2*CHUNKSIZE;
1063         a->reallocs++;
1064         a->nz++;
1065       }
1066       N = nrow++ - 1;
1067       /* shift up all the later entries in this row */
1068       for ( ii=N; ii>=i; ii-- ) {
1069         rp[ii+1] = rp[ii];
1070         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1071       }
1072       if (N>=i) {
1073         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1074       }
1075       rp[i]                      = bcol;
1076       ap[bs2*i + bs*cidx + ridx] = value;
1077       noinsert1:;
1078       low = i;
1079     }
1080     ailen[brow] = nrow;
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1086 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1087 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1088 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1089 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1090 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1091 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1092 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1093 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1094 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1095 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1096 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1097 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1098 extern int MatZeroEntries_SeqBAIJ(Mat);
1099 
1100 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1101 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1102 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1103 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1104 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1105 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1106 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1107 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1108 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec);
1109 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec);
1110 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec);
1111 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec);
1112 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec);
1113 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec);
1114 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec);
1115 
1116 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1117 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1118 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1119 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1120 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1121 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1122 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1123 
1124 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1125 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1126 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1127 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1128 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1129 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1130 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1131 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1132 
1133 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1134 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1135 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1136 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1137 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1138 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1139 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1140 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1141 
1142 #undef __FUNC__
1143 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1144 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1145 {
1146   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1147   Mat         outA;
1148   int         ierr;
1149   PetscTruth  row_identity, col_identity;
1150 
1151   PetscFunctionBegin;
1152   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1153   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1154   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1155   if (!row_identity || !col_identity) {
1156     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1157   }
1158 
1159   outA          = inA;
1160   inA->factor   = FACTOR_LU;
1161   a->row        = row;
1162   a->col        = col;
1163 
1164   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1165   ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1166   PLogObjectParent(inA,a->icol);
1167 
1168   if (!a->solve_work) {
1169     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1170     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1171   }
1172 
1173   if (!a->diag) {
1174     ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr);
1175   }
1176   /*
1177       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1178       for ILU(0) factorization with natural ordering
1179   */
1180   switch (a->bs) {
1181   case 1:
1182     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1183     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1184   case 2:
1185     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1186     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1187     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1188     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1189     break;
1190   case 3:
1191     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1192     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1193     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3_NaturalOrdering;
1194     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1195     break;
1196   case 4:
1197     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1198     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1199     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4_NaturalOrdering;
1200     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1201     break;
1202   case 5:
1203     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1204     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1205     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5_NaturalOrdering;
1206     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1207     break;
1208   case 6:
1209     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1210     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1211     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6_NaturalOrdering;
1212     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1213     break;
1214   case 7:
1215     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1216     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7_NaturalOrdering;
1217     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1218     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1219     break;
1220   }
1221 
1222   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1223 
1224   PetscFunctionReturn(0);
1225 }
1226 #undef __FUNC__
1227 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1228 int MatPrintHelp_SeqBAIJ(Mat A)
1229 {
1230   static int called = 0;
1231   MPI_Comm   comm = A->comm;
1232   int        ierr;
1233 
1234   PetscFunctionBegin;
1235   if (called) {PetscFunctionReturn(0);} else called = 1;
1236   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1237   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 EXTERN_C_BEGIN
1242 #undef __FUNC__
1243 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1244 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1245 {
1246   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1247   int         i,nz,n;
1248 
1249   PetscFunctionBegin;
1250   nz = baij->maxnz;
1251   n  = baij->n;
1252   for (i=0; i<nz; i++) {
1253     baij->j[i] = indices[i];
1254   }
1255   baij->nz = nz;
1256   for ( i=0; i<n; i++ ) {
1257     baij->ilen[i] = baij->imax[i];
1258   }
1259 
1260   PetscFunctionReturn(0);
1261 }
1262 EXTERN_C_END
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1266 /*@
1267     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1268        in the matrix.
1269 
1270   Input Parameters:
1271 +  mat - the SeqBAIJ matrix
1272 -  indices - the column indices
1273 
1274   Level: advanced
1275 
1276   Notes:
1277     This can be called if you have precomputed the nonzero structure of the
1278   matrix and want to provide it to the matrix object to improve the performance
1279   of the MatSetValues() operation.
1280 
1281     You MUST have set the correct numbers of nonzeros per row in the call to
1282   MatCreateSeqBAIJ().
1283 
1284     MUST be called before any calls to MatSetValues();
1285 
1286 @*/
1287 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1288 {
1289   int ierr,(*f)(Mat,int *);
1290 
1291   PetscFunctionBegin;
1292   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1293   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1294   if (f) {
1295     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1296   } else {
1297     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /* -------------------------------------------------------------------*/
1303 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1304        MatGetRow_SeqBAIJ,
1305        MatRestoreRow_SeqBAIJ,
1306        MatMult_SeqBAIJ_N,
1307        MatMultAdd_SeqBAIJ_N,
1308        MatMultTrans_SeqBAIJ,
1309        MatMultTransAdd_SeqBAIJ,
1310        MatSolve_SeqBAIJ_N,
1311        0,
1312        0,
1313        0,
1314        MatLUFactor_SeqBAIJ,
1315        0,
1316        0,
1317        MatTranspose_SeqBAIJ,
1318        MatGetInfo_SeqBAIJ,
1319        MatEqual_SeqBAIJ,
1320        MatGetDiagonal_SeqBAIJ,
1321        MatDiagonalScale_SeqBAIJ,
1322        MatNorm_SeqBAIJ,
1323        0,
1324        MatAssemblyEnd_SeqBAIJ,
1325        0,
1326        MatSetOption_SeqBAIJ,
1327        MatZeroEntries_SeqBAIJ,
1328        MatZeroRows_SeqBAIJ,
1329        MatLUFactorSymbolic_SeqBAIJ,
1330        MatLUFactorNumeric_SeqBAIJ_N,
1331        0,
1332        0,
1333        MatGetSize_SeqBAIJ,
1334        MatGetSize_SeqBAIJ,
1335        MatGetOwnershipRange_SeqBAIJ,
1336        MatILUFactorSymbolic_SeqBAIJ,
1337        0,
1338        0,
1339        0,
1340        MatDuplicate_SeqBAIJ,
1341        0,
1342        0,
1343        MatILUFactor_SeqBAIJ,
1344        0,
1345        0,
1346        MatGetSubMatrices_SeqBAIJ,
1347        MatIncreaseOverlap_SeqBAIJ,
1348        MatGetValues_SeqBAIJ,
1349        0,
1350        MatPrintHelp_SeqBAIJ,
1351        MatScale_SeqBAIJ,
1352        0,
1353        0,
1354        0,
1355        MatGetBlockSize_SeqBAIJ,
1356        MatGetRowIJ_SeqBAIJ,
1357        MatRestoreRowIJ_SeqBAIJ,
1358        0,
1359        0,
1360        0,
1361        0,
1362        0,
1363        0,
1364        MatSetValuesBlocked_SeqBAIJ,
1365        MatGetSubMatrix_SeqBAIJ,
1366        0,
1367        0,
1368        MatGetMaps_Petsc};
1369 
1370 EXTERN_C_BEGIN
1371 #undef __FUNC__
1372 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1373 int MatStoreValues_SeqBAIJ(Mat mat)
1374 {
1375   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1376   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1377   int         ierr;
1378 
1379   PetscFunctionBegin;
1380   if (aij->nonew != 1) {
1381     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1382   }
1383 
1384   /* allocate space for values if not already there */
1385   if (!aij->saved_values) {
1386     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1387   }
1388 
1389   /* copy values over */
1390   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 EXTERN_C_END
1394 
1395 EXTERN_C_BEGIN
1396 #undef __FUNC__
1397 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1398 int MatRetrieveValues_SeqBAIJ(Mat mat)
1399 {
1400   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1401   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1402 
1403   PetscFunctionBegin;
1404   if (aij->nonew != 1) {
1405     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1406   }
1407   if (!aij->saved_values) {
1408     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1409   }
1410 
1411   /* copy values over */
1412   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 EXTERN_C_END
1416 
1417 #undef __FUNC__
1418 #define __FUNC__ "MatCreateSeqBAIJ"
1419 /*@C
1420    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1421    compressed row) format.  For good matrix assembly performance the
1422    user should preallocate the matrix storage by setting the parameter nz
1423    (or the array nnz).  By setting these parameters accurately, performance
1424    during matrix assembly can be increased by more than a factor of 50.
1425 
1426    Collective on MPI_Comm
1427 
1428    Input Parameters:
1429 +  comm - MPI communicator, set to PETSC_COMM_SELF
1430 .  bs - size of block
1431 .  m - number of rows
1432 .  n - number of columns
1433 .  nz - number of block nonzeros per block row (same for all rows)
1434 -  nnz - array containing the number of block nonzeros in the various block rows
1435          (possibly different for each block row) or PETSC_NULL
1436 
1437    Output Parameter:
1438 .  A - the matrix
1439 
1440    Options Database Keys:
1441 .   -mat_no_unroll - uses code that does not unroll the loops in the
1442                      block calculations (much slower)
1443 .    -mat_block_size - size of the blocks to use
1444 
1445    Level: intermediate
1446 
1447    Notes:
1448    The block AIJ format is fully compatible with standard Fortran 77
1449    storage.  That is, the stored row and column indices can begin at
1450    either one (as in Fortran) or zero.  See the users' manual for details.
1451 
1452    Specify the preallocated storage with either nz or nnz (not both).
1453    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1454    allocation.  For additional details, see the users manual chapter on
1455    matrices.
1456 
1457 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1458 @*/
1459 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1460 {
1461   Mat         B;
1462   Mat_SeqBAIJ *b;
1463   int         i,len,ierr,mbs,nbs,bs2,size;
1464   PetscTruth  flg;
1465 
1466   PetscFunctionBegin;
1467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1468   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1469 
1470   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1471   mbs  = m/bs;
1472   nbs  = n/bs;
1473   bs2  = bs*bs;
1474 
1475   if (mbs*bs!=m || nbs*bs!=n) {
1476     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1477   }
1478 
1479   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1480   if (nnz) {
1481     for (i=0; i<mbs; i++) {
1482       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1483     }
1484   }
1485 
1486   *A = 0;
1487   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1488   PLogObjectCreate(B);
1489   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1490   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1491   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1492   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1493   if (!flg) {
1494     switch (bs) {
1495     case 1:
1496       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1497       B->ops->solve           = MatSolve_SeqBAIJ_1;
1498       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_1;
1499       B->ops->mult            = MatMult_SeqBAIJ_1;
1500       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1501       break;
1502     case 2:
1503       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1504       B->ops->solve           = MatSolve_SeqBAIJ_2;
1505       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2;
1506       B->ops->mult            = MatMult_SeqBAIJ_2;
1507       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1508       break;
1509     case 3:
1510       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1511       B->ops->solve           = MatSolve_SeqBAIJ_3;
1512       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3;
1513       B->ops->mult            = MatMult_SeqBAIJ_3;
1514       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1515       break;
1516     case 4:
1517       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1518       B->ops->solve           = MatSolve_SeqBAIJ_4;
1519       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4;
1520       B->ops->mult            = MatMult_SeqBAIJ_4;
1521       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1522       break;
1523     case 5:
1524       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1525       B->ops->solve           = MatSolve_SeqBAIJ_5;
1526       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5;
1527       B->ops->mult            = MatMult_SeqBAIJ_5;
1528       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1529       break;
1530     case 6:
1531       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1532       B->ops->solve           = MatSolve_SeqBAIJ_6;
1533       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6;
1534       B->ops->mult            = MatMult_SeqBAIJ_6;
1535       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1536       break;
1537     case 7:
1538       B->ops->mult            = MatMult_SeqBAIJ_7;
1539       B->ops->solve           = MatSolve_SeqBAIJ_7;
1540       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7;
1541       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1542       break;
1543     }
1544   }
1545   B->ops->destroy     = MatDestroy_SeqBAIJ;
1546   B->ops->view        = MatView_SeqBAIJ;
1547   B->factor           = 0;
1548   B->lupivotthreshold = 1.0;
1549   B->mapping          = 0;
1550   b->row              = 0;
1551   b->col              = 0;
1552   b->icol             = 0;
1553   b->reallocs         = 0;
1554   b->saved_values     = 0;
1555 
1556   b->m       = m; B->m = m; B->M = m;
1557   b->n       = n; B->n = n; B->N = n;
1558 
1559   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1560   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1561 
1562   b->mbs     = mbs;
1563   b->nbs     = nbs;
1564   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1565   if (nnz == PETSC_NULL) {
1566     if (nz == PETSC_DEFAULT) nz = 5;
1567     else if (nz <= 0)        nz = 1;
1568     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1569     nz = nz*mbs;
1570   } else {
1571     nz = 0;
1572     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1573   }
1574 
1575   /* allocate the matrix space */
1576   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1577   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1578   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1579   b->j  = (int *) (b->a + nz*bs2);
1580   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1581   b->i  = b->j + nz;
1582   b->singlemalloc = 1;
1583 
1584   b->i[0] = 0;
1585   for (i=1; i<mbs+1; i++) {
1586     b->i[i] = b->i[i-1] + b->imax[i-1];
1587   }
1588 
1589   /* b->ilen will count nonzeros in each block row so far. */
1590   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1591   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1592   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1593 
1594   b->bs               = bs;
1595   b->bs2              = bs2;
1596   b->mbs              = mbs;
1597   b->nz               = 0;
1598   b->maxnz            = nz*bs2;
1599   b->sorted           = 0;
1600   b->roworiented      = 1;
1601   b->nonew            = 0;
1602   b->diag             = 0;
1603   b->solve_work       = 0;
1604   b->mult_work        = 0;
1605   b->spptr            = 0;
1606   B->info.nz_unneeded = (double)b->maxnz;
1607 
1608   *A = B;
1609   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1610   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1611 
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1613                                      "MatStoreValues_SeqBAIJ",
1614                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1616                                      "MatRetrieveValues_SeqBAIJ",
1617                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1619                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1620                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNC__
1625 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1626 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1627 {
1628   Mat         C;
1629   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1630   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1631 
1632   PetscFunctionBegin;
1633   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1634 
1635   *B = 0;
1636   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1637   PLogObjectCreate(C);
1638   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1639   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1640   C->ops->destroy = MatDestroy_SeqBAIJ;
1641   C->ops->view    = MatView_SeqBAIJ;
1642   C->factor       = A->factor;
1643   c->row          = 0;
1644   c->col          = 0;
1645   c->icol         = 0;
1646   c->saved_values = 0;
1647   C->assembled    = PETSC_TRUE;
1648 
1649   c->m = C->m   = a->m;
1650   c->n = C->n   = a->n;
1651   C->M          = a->m;
1652   C->N          = a->n;
1653 
1654   c->bs         = a->bs;
1655   c->bs2        = a->bs2;
1656   c->mbs        = a->mbs;
1657   c->nbs        = a->nbs;
1658 
1659   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1660   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1661   for ( i=0; i<mbs; i++ ) {
1662     c->imax[i] = a->imax[i];
1663     c->ilen[i] = a->ilen[i];
1664   }
1665 
1666   /* allocate the matrix space */
1667   c->singlemalloc = 1;
1668   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1669   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1670   c->j = (int *) (c->a + nz*bs2);
1671   c->i = c->j + nz;
1672   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1673   if (mbs > 0) {
1674     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1675     if (cpvalues == MAT_COPY_VALUES) {
1676       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1677     } else {
1678       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1679     }
1680   }
1681 
1682   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1683   c->sorted      = a->sorted;
1684   c->roworiented = a->roworiented;
1685   c->nonew       = a->nonew;
1686 
1687   if (a->diag) {
1688     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1689     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1690     for ( i=0; i<mbs; i++ ) {
1691       c->diag[i] = a->diag[i];
1692     }
1693   } else c->diag        = 0;
1694   c->nz                 = a->nz;
1695   c->maxnz              = a->maxnz;
1696   c->solve_work         = 0;
1697   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1698   c->mult_work          = 0;
1699   *B = C;
1700   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNC__
1705 #define __FUNC__ "MatLoad_SeqBAIJ"
1706 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1707 {
1708   Mat_SeqBAIJ  *a;
1709   Mat          B;
1710   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1711   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1712   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1713   int          *masked, nmask,tmp,bs2,ishift;
1714   Scalar       *aa;
1715   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1716 
1717   PetscFunctionBegin;
1718   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1719   bs2  = bs*bs;
1720 
1721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1722   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1723   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1724   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1725   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1726   M = header[1]; N = header[2]; nz = header[3];
1727 
1728   if (header[3] < 0) {
1729     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1730   }
1731 
1732   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1733 
1734   /*
1735      This code adds extra rows to make sure the number of rows is
1736     divisible by the blocksize
1737   */
1738   mbs        = M/bs;
1739   extra_rows = bs - M + bs*(mbs);
1740   if (extra_rows == bs) extra_rows = 0;
1741   else                  mbs++;
1742   if (extra_rows) {
1743     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1744   }
1745 
1746   /* read in row lengths */
1747   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1748   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1749   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1750 
1751   /* read in column indices */
1752   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1753   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1754   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1755 
1756   /* loop over row lengths determining block row lengths */
1757   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1758   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1759   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1760   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1761   masked      = mask + mbs;
1762   rowcount    = 0; nzcount = 0;
1763   for ( i=0; i<mbs; i++ ) {
1764     nmask = 0;
1765     for ( j=0; j<bs; j++ ) {
1766       kmax = rowlengths[rowcount];
1767       for ( k=0; k<kmax; k++ ) {
1768         tmp = jj[nzcount++]/bs;
1769         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1770       }
1771       rowcount++;
1772     }
1773     browlengths[i] += nmask;
1774     /* zero out the mask elements we set */
1775     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1776   }
1777 
1778   /* create our matrix */
1779   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1780   B = *A;
1781   a = (Mat_SeqBAIJ *) B->data;
1782 
1783   /* set matrix "i" values */
1784   a->i[0] = 0;
1785   for ( i=1; i<= mbs; i++ ) {
1786     a->i[i]      = a->i[i-1] + browlengths[i-1];
1787     a->ilen[i-1] = browlengths[i-1];
1788   }
1789   a->nz         = 0;
1790   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1791 
1792   /* read in nonzero values */
1793   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1794   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1795   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1796 
1797   /* set "a" and "j" values into matrix */
1798   nzcount = 0; jcount = 0;
1799   for ( i=0; i<mbs; i++ ) {
1800     nzcountb = nzcount;
1801     nmask    = 0;
1802     for ( j=0; j<bs; j++ ) {
1803       kmax = rowlengths[i*bs+j];
1804       for ( k=0; k<kmax; k++ ) {
1805         tmp = jj[nzcount++]/bs;
1806 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1807       }
1808       rowcount++;
1809     }
1810     /* sort the masked values */
1811     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1812 
1813     /* set "j" values into matrix */
1814     maskcount = 1;
1815     for ( j=0; j<nmask; j++ ) {
1816       a->j[jcount++]  = masked[j];
1817       mask[masked[j]] = maskcount++;
1818     }
1819     /* set "a" values into matrix */
1820     ishift = bs2*a->i[i];
1821     for ( j=0; j<bs; j++ ) {
1822       kmax = rowlengths[i*bs+j];
1823       for ( k=0; k<kmax; k++ ) {
1824         tmp       = jj[nzcountb]/bs ;
1825         block     = mask[tmp] - 1;
1826         point     = jj[nzcountb] - bs*tmp;
1827         idx       = ishift + bs2*block + j + bs*point;
1828         a->a[idx] = aa[nzcountb++];
1829       }
1830     }
1831     /* zero out the mask elements we set */
1832     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1833   }
1834   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1835 
1836   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1837   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1838   ierr = PetscFree(aa);CHKERRQ(ierr);
1839   ierr = PetscFree(jj);CHKERRQ(ierr);
1840   ierr = PetscFree(mask);CHKERRQ(ierr);
1841 
1842   B->assembled = PETSC_TRUE;
1843 
1844   ierr = MatView_Private(B);CHKERRQ(ierr);
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 
1849 
1850