xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c4ac5f5fc01a91c2bb048584e85ba147c7d8a938)
1 /*$Id: baij.c,v 1.198 2000/01/26 22:08:15 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
37 int MatMarkDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
40   int         i,j,*diag,m = a->mbs;
41 
42   PetscFunctionBegin;
43   if (a->diag) PetscFunctionReturn(0);
44 
45   diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag);
46   PLogObjectMemory(A,(m+1)*sizeof(int));
47   for (i=0; i<m; i++) {
48     diag[i] = a->i[i+1];
49     for (j=a->i[i]; j<a->i[i+1]; j++) {
50       if (a->j[j] == i) {
51         diag[i] = j;
52         break;
53       }
54     }
55   }
56   a->diag = diag;
57   PetscFunctionReturn(0);
58 }
59 
60 
61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
62 
63 #undef __FUNC__
64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
66 {
67   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
68   int         ierr,n = a->mbs,i;
69 
70   PetscFunctionBegin;
71   *nn = n;
72   if (!ia) PetscFunctionReturn(0);
73   if (symmetric) {
74     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
75   } else if (oshift == 1) {
76     /* temporarily add 1 to i and j indices */
77     int nz = a->i[n] + 1;
78     for (i=0; i<nz; i++) a->j[i]++;
79     for (i=0; i<n+1; i++) a->i[i]++;
80     *ia = a->i; *ja = a->j;
81   } else {
82     *ia = a->i; *ja = a->j;
83   }
84 
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
91                                 PetscTruth *done)
92 {
93   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
94   int         i,n = a->mbs,ierr;
95 
96   PetscFunctionBegin;
97   if (!ia) PetscFunctionReturn(0);
98   if (symmetric) {
99     ierr = PetscFree(*ia);CHKERRQ(ierr);
100     ierr = PetscFree(*ja);CHKERRQ(ierr);
101   } else if (oshift == 1) {
102     int nz = a->i[n];
103     for (i=0; i<nz; i++) a->j[i]--;
104     for (i=0; i<n+1; i++) a->i[i]--;
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNC__
110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
112 {
113   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
114 
115   PetscFunctionBegin;
116   *bs = baij->bs;
117   PetscFunctionReturn(0);
118 }
119 
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatDestroy_SeqBAIJ"
123 int MatDestroy_SeqBAIJ(Mat A)
124 {
125   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126   int         ierr;
127 
128   PetscFunctionBegin;
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(PETSC_USE_LOG)
142   PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162   ierr = PetscFree(a);CHKERRQ(ierr);
163   PLogObjectDestroy(A);
164   PetscHeaderDestroy(A);
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNC__
169 #define __FUNC__ "MatSetOption_SeqBAIJ"
170 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173 
174   PetscFunctionBegin;
175   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
176   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
177   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
178   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
179   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
180   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
181   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
182   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
183   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
184   else if (op == MAT_ROWS_SORTED ||
185            op == MAT_ROWS_UNSORTED ||
186            op == MAT_SYMMETRIC ||
187            op == MAT_STRUCTURALLY_SYMMETRIC ||
188            op == MAT_YES_NEW_DIAGONALS ||
189            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
190            op == MAT_USE_HASH_TABLE) {
191     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
192   } else if (op == MAT_NO_NEW_DIAGONALS) {
193     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
194   } else {
195     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 
201 #undef __FUNC__
202 #define __FUNC__ "MatGetSize_SeqBAIJ"
203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
204 {
205   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
206 
207   PetscFunctionBegin;
208   if (m) *m = a->m;
209   if (n) *n = a->n;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218 
219   PetscFunctionBegin;
220   if (m) *m = 0;
221   if (n) *n = a->m;
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNC__
226 #define __FUNC__ "MatGetRow_SeqBAIJ"
227 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
228 {
229   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
230   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
231   MatScalar    *aa,*aa_i;
232   Scalar       *v_i;
233 
234   PetscFunctionBegin;
235   bs  = a->bs;
236   ai  = a->i;
237   aj  = a->j;
238   aa  = a->a;
239   bs2 = a->bs2;
240 
241   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
242 
243   bn  = row/bs;   /* Block number */
244   bp  = row % bs; /* Block Position */
245   M   = ai[bn+1] - ai[bn];
246   *nz = bs*M;
247 
248   if (v) {
249     *v = 0;
250     if (*nz) {
251       *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v);
252       for (i=0; i<M; i++) { /* for each block in the block row */
253         v_i  = *v + i*bs;
254         aa_i = aa + bs2*(ai[bn] + i);
255         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
256       }
257     }
258   }
259 
260   if (idx) {
261     *idx = 0;
262     if (*nz) {
263       *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx);
264       for (i=0; i<M; i++) { /* for each block in the block row */
265         idx_i = *idx + i*bs;
266         itmp  = bs*aj[ai[bn] + i];
267         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNC__
275 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
276 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
277 {
278   int ierr;
279 
280   PetscFunctionBegin;
281   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
282   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNC__
287 #define __FUNC__ "MatTranspose_SeqBAIJ"
288 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
289 {
290   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
291   Mat         C;
292   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
293   int         *rows,*cols,bs2=a->bs2,refct;
294   MatScalar   *array = a->a;
295 
296   PetscFunctionBegin;
297   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
298   col  = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
299   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
300 
301   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
302   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
303   ierr = PetscFree(col);CHKERRQ(ierr);
304   rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
305   cols = rows + bs;
306   for (i=0; i<mbs; i++) {
307     cols[0] = i*bs;
308     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
309     len = ai[i+1] - ai[i];
310     for (j=0; j<len; j++) {
311       rows[0] = (*aj++)*bs;
312       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
313       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
314       array += bs2;
315     }
316   }
317   ierr = PetscFree(rows);CHKERRQ(ierr);
318 
319   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321 
322   if (B) {
323     *B = C;
324   } else {
325     PetscOps *Abops;
326     MatOps   Aops;
327 
328     /* This isn't really an in-place transpose */
329     ierr = PetscFree(a->a);CHKERRQ(ierr);
330     if (!a->singlemalloc) {
331       ierr = PetscFree(a->i);CHKERRQ(ierr);
332       ierr = PetscFree(a->j);CHKERRQ(ierr);
333     }
334     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
335     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
336     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
337     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
338     ierr = PetscFree(a);CHKERRQ(ierr);
339 
340 
341     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
342     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
343 
344     /*
345        This is horrible, horrible code. We need to keep the
346       A pointers for the bops and ops but copy everything
347       else from C.
348     */
349     Abops    = A->bops;
350     Aops     = A->ops;
351     refct    = A->refct;
352     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
353     A->bops  = Abops;
354     A->ops   = Aops;
355     A->qlist = 0;
356     A->refct = refct;
357 
358     PetscHeaderDestroy(C);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNC__
364 #define __FUNC__ "MatView_SeqBAIJ_Binary"
365 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
366 {
367   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
369   Scalar      *aa;
370   FILE        *file;
371 
372   PetscFunctionBegin;
373   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
374   col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
375   col_lens[0] = MAT_COOKIE;
376 
377   col_lens[1] = a->m;
378   col_lens[2] = a->n;
379   col_lens[3] = a->nz*bs2;
380 
381   /* store lengths of each row and write (including header) to file */
382   count = 0;
383   for (i=0; i<a->mbs; i++) {
384     for (j=0; j<bs; j++) {
385       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
386     }
387   }
388   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
389   ierr = PetscFree(col_lens);CHKERRQ(ierr);
390 
391   /* store column indices (zero start index) */
392   jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj);
393   count = 0;
394   for (i=0; i<a->mbs; i++) {
395     for (j=0; j<bs; j++) {
396       for (k=a->i[i]; k<a->i[i+1]; k++) {
397         for (l=0; l<bs; l++) {
398           jj[count++] = bs*a->j[k] + l;
399         }
400       }
401     }
402   }
403   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
404   ierr = PetscFree(jj);CHKERRQ(ierr);
405 
406   /* store nonzero values */
407   aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
408   count = 0;
409   for (i=0; i<a->mbs; i++) {
410     for (j=0; j<bs; j++) {
411       for (k=a->i[i]; k<a->i[i+1]; k++) {
412         for (l=0; l<bs; l++) {
413           aa[count++] = a->a[bs2*k + l*bs + j];
414         }
415       }
416     }
417   }
418   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
419   ierr = PetscFree(aa);CHKERRQ(ierr);
420 
421   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
422   if (file) {
423     fprintf(file,"-matload_block_size %d\n",a->bs);
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNC__
429 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
431 {
432   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
433   int         ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2;
434   char        *outputname;
435 
436   PetscFunctionBegin;
437   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
438   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
439   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
440     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
441   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
442     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
443   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
444     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
445     for (i=0; i<a->mbs; i++) {
446       for (j=0; j<bs; j++) {
447         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
448         for (k=a->i[i]; k<a->i[i+1]; k++) {
449           for (l=0; l<bs; l++) {
450 #if defined(PETSC_USE_COMPLEX)
451             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
452               ierr = ViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
453                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
454             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
455               ierr = ViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
456                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
457             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
458               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
459             }
460 #else
461             if (a->a[bs2*k + l*bs + j] != 0.0) {
462               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
463             }
464 #endif
465           }
466         }
467         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
468       }
469     }
470     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
471   } else {
472     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
473     for (i=0; i<a->mbs; i++) {
474       for (j=0; j<bs; j++) {
475         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
476         for (k=a->i[i]; k<a->i[i+1]; k++) {
477           for (l=0; l<bs; l++) {
478 #if defined(PETSC_USE_COMPLEX)
479             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
480               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
481                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
482             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
483               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
484                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
485             } else {
486               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
487             }
488 #else
489             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
490 #endif
491           }
492         }
493         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
494       }
495     }
496     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
497   }
498   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNC__
503 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
504 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
505 {
506   Mat          A = (Mat) Aa;
507   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
508   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
509   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
510   MatScalar    *aa;
511   Viewer       viewer;
512 
513   PetscFunctionBegin;
514 
515   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
516   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
517 
518   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
519 
520   /* loop over matrix elements drawing boxes */
521   color = DRAW_BLUE;
522   for (i=0,row=0; i<mbs; i++,row+=bs) {
523     for (j=a->i[i]; j<a->i[i+1]; j++) {
524       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
525       x_l = a->j[j]*bs; x_r = x_l + 1.0;
526       aa = a->a + j*bs2;
527       for (k=0; k<bs; k++) {
528         for (l=0; l<bs; l++) {
529           if (PetscRealPart(*aa++) >=  0.) continue;
530           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
531         }
532       }
533     }
534   }
535   color = DRAW_CYAN;
536   for (i=0,row=0; i<mbs; i++,row+=bs) {
537     for (j=a->i[i]; j<a->i[i+1]; j++) {
538       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
539       x_l = a->j[j]*bs; x_r = x_l + 1.0;
540       aa = a->a + j*bs2;
541       for (k=0; k<bs; k++) {
542         for (l=0; l<bs; l++) {
543           if (PetscRealPart(*aa++) != 0.) continue;
544           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
545         }
546       }
547     }
548   }
549 
550   color = DRAW_RED;
551   for (i=0,row=0; i<mbs; i++,row+=bs) {
552     for (j=a->i[i]; j<a->i[i+1]; j++) {
553       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
554       x_l = a->j[j]*bs; x_r = x_l + 1.0;
555       aa = a->a + j*bs2;
556       for (k=0; k<bs; k++) {
557         for (l=0; l<bs; l++) {
558           if (PetscRealPart(*aa++) <= 0.) continue;
559           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
560         }
561       }
562     }
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNC__
568 #define __FUNC__ "MatView_SeqBAIJ_Draw"
569 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
570 {
571   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
572   int          ierr;
573   PetscReal    xl,yl,xr,yr,w,h;
574   Draw         draw;
575   PetscTruth   isnull;
576 
577   PetscFunctionBegin;
578 
579   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
580   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
581 
582   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
583   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
584   xr += w;    yr += h;  xl = -w;     yl = -h;
585   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
586   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
587   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ "MatView_SeqBAIJ"
593 int MatView_SeqBAIJ(Mat A,Viewer viewer)
594 {
595   int        ierr;
596   PetscTruth issocket,isascii,isbinary,isdraw;
597 
598   PetscFunctionBegin;
599   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
602   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
603   if (issocket) {
604     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
605   } else if (isascii){
606     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
607   } else if (isbinary) {
608     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
609   } else if (isdraw) {
610     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
611   } else {
612     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 
618 #undef __FUNC__
619 #define __FUNC__ "MatGetValues_SeqBAIJ"
620 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
621 {
622   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
623   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
624   int        *ai = a->i,*ailen = a->ilen;
625   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
626   MatScalar  *ap,*aa = a->a,zero = 0.0;
627 
628   PetscFunctionBegin;
629   for (k=0; k<m; k++) { /* loop over rows */
630     row  = im[k]; brow = row/bs;
631     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
632     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
633     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
634     nrow = ailen[brow];
635     for (l=0; l<n; l++) { /* loop over columns */
636       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
637       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
638       col  = in[l] ;
639       bcol = col/bs;
640       cidx = col%bs;
641       ridx = row%bs;
642       high = nrow;
643       low  = 0; /* assume unsorted */
644       while (high-low > 5) {
645         t = (low+high)/2;
646         if (rp[t] > bcol) high = t;
647         else             low  = t;
648       }
649       for (i=low; i<high; i++) {
650         if (rp[i] > bcol) break;
651         if (rp[i] == bcol) {
652           *v++ = ap[bs2*i+bs*cidx+ridx];
653           goto finished;
654         }
655       }
656       *v++ = zero;
657       finished:;
658     }
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
666 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
667 {
668   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
669   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
670   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
671   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
672   Scalar      *value = v;
673   MatScalar   *ap,*aa=a->a,*bap;
674 
675   PetscFunctionBegin;
676   if (roworiented) {
677     stepval = (n-1)*bs;
678   } else {
679     stepval = (m-1)*bs;
680   }
681   for (k=0; k<m; k++) { /* loop over added rows */
682     row  = im[k];
683     if (row < 0) continue;
684 #if defined(PETSC_USE_BOPT_g)
685     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
686 #endif
687     rp   = aj + ai[row];
688     ap   = aa + bs2*ai[row];
689     rmax = imax[row];
690     nrow = ailen[row];
691     low  = 0;
692     for (l=0; l<n; l++) { /* loop over added columns */
693       if (in[l] < 0) continue;
694 #if defined(PETSC_USE_BOPT_g)
695       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
696 #endif
697       col = in[l];
698       if (roworiented) {
699         value = v + k*(stepval+bs)*bs + l*bs;
700       } else {
701         value = v + l*(stepval+bs)*bs + k*bs;
702       }
703       if (!sorted) low = 0; high = nrow;
704       while (high-low > 7) {
705         t = (low+high)/2;
706         if (rp[t] > col) high = t;
707         else             low  = t;
708       }
709       for (i=low; i<high; i++) {
710         if (rp[i] > col) break;
711         if (rp[i] == col) {
712           bap  = ap +  bs2*i;
713           if (roworiented) {
714             if (is == ADD_VALUES) {
715               for (ii=0; ii<bs; ii++,value+=stepval) {
716                 for (jj=ii; jj<bs2; jj+=bs) {
717                   bap[jj] += *value++;
718                 }
719               }
720             } else {
721               for (ii=0; ii<bs; ii++,value+=stepval) {
722                 for (jj=ii; jj<bs2; jj+=bs) {
723                   bap[jj] = *value++;
724                 }
725               }
726             }
727           } else {
728             if (is == ADD_VALUES) {
729               for (ii=0; ii<bs; ii++,value+=stepval) {
730                 for (jj=0; jj<bs; jj++) {
731                   *bap++ += *value++;
732                 }
733               }
734             } else {
735               for (ii=0; ii<bs; ii++,value+=stepval) {
736                 for (jj=0; jj<bs; jj++) {
737                   *bap++  = *value++;
738                 }
739               }
740             }
741           }
742           goto noinsert2;
743         }
744       }
745       if (nonew == 1) goto noinsert2;
746       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
747       if (nrow >= rmax) {
748         /* there is no extra room in row, therefore enlarge */
749         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
750         MatScalar *new_a;
751 
752         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
753 
754         /* malloc new storage space */
755         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
756         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
757         new_j   = (int*)(new_a + bs2*new_nz);
758         new_i   = new_j + new_nz;
759 
760         /* copy over old data into new slots */
761         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
762         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
763         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
764         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
765         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
766         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
767         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
768         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
769         /* free up old matrix storage */
770         ierr = PetscFree(a->a);CHKERRQ(ierr);
771         if (!a->singlemalloc) {
772           ierr = PetscFree(a->i);CHKERRQ(ierr);
773           ierr = PetscFree(a->j);CHKERRQ(ierr);
774         }
775         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
776         a->singlemalloc = PETSC_TRUE;
777 
778         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
779         rmax = imax[row] = imax[row] + CHUNKSIZE;
780         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
781         a->maxnz += bs2*CHUNKSIZE;
782         a->reallocs++;
783         a->nz++;
784       }
785       N = nrow++ - 1;
786       /* shift up all the later entries in this row */
787       for (ii=N; ii>=i; ii--) {
788         rp[ii+1] = rp[ii];
789         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
790       }
791       if (N >= i) {
792         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
793       }
794       rp[i] = col;
795       bap   = ap +  bs2*i;
796       if (roworiented) {
797         for (ii=0; ii<bs; ii++,value+=stepval) {
798           for (jj=ii; jj<bs2; jj+=bs) {
799             bap[jj] = *value++;
800           }
801         }
802       } else {
803         for (ii=0; ii<bs; ii++,value+=stepval) {
804           for (jj=0; jj<bs; jj++) {
805             *bap++  = *value++;
806           }
807         }
808       }
809       noinsert2:;
810       low = i;
811     }
812     ailen[row] = nrow;
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 
818 #undef __FUNC__
819 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
820 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
821 {
822   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
823   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
824   int        m = a->m,*ip,N,*ailen = a->ilen;
825   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
826   MatScalar  *aa = a->a,*ap;
827 
828   PetscFunctionBegin;
829   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
830 
831   if (m) rmax = ailen[0];
832   for (i=1; i<mbs; i++) {
833     /* move each row back by the amount of empty slots (fshift) before it*/
834     fshift += imax[i-1] - ailen[i-1];
835     rmax   = PetscMax(rmax,ailen[i]);
836     if (fshift) {
837       ip = aj + ai[i]; ap = aa + bs2*ai[i];
838       N = ailen[i];
839       for (j=0; j<N; j++) {
840         ip[j-fshift] = ip[j];
841         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
842       }
843     }
844     ai[i] = ai[i-1] + ailen[i-1];
845   }
846   if (mbs) {
847     fshift += imax[mbs-1] - ailen[mbs-1];
848     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
849   }
850   /* reset ilen and imax for each row */
851   for (i=0; i<mbs; i++) {
852     ailen[i] = imax[i] = ai[i+1] - ai[i];
853   }
854   a->nz = ai[mbs];
855 
856   /* diagonals may have moved, so kill the diagonal pointers */
857   if (fshift && a->diag) {
858     ierr = PetscFree(a->diag);CHKERRQ(ierr);
859     PLogObjectMemory(A,-(m+1)*sizeof(int));
860     a->diag = 0;
861   }
862   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
863            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
864   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
865            a->reallocs);
866   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
867   a->reallocs          = 0;
868   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
869 
870   PetscFunctionReturn(0);
871 }
872 
873 
874 
875 /*
876    This function returns an array of flags which indicate the locations of contiguous
877    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
878    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
879    Assume: sizes should be long enough to hold all the values.
880 */
881 #undef __FUNC__
882 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
883 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
884 {
885   int        i,j,k,row;
886   PetscTruth flg;
887 
888   PetscFunctionBegin;
889   for (i=0,j=0; i<n; j++) {
890     row = idx[i];
891     if (row%bs!=0) { /* Not the begining of a block */
892       sizes[j] = 1;
893       i++;
894     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
895       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
896       i++;
897     } else { /* Begining of the block, so check if the complete block exists */
898       flg = PETSC_TRUE;
899       for (k=1; k<bs; k++) {
900         if (row+k != idx[i+k]) { /* break in the block */
901           flg = PETSC_FALSE;
902           break;
903         }
904       }
905       if (flg == PETSC_TRUE) { /* No break in the bs */
906         sizes[j] = bs;
907         i+= bs;
908       } else {
909         sizes[j] = 1;
910         i++;
911       }
912     }
913   }
914   *bs_max = j;
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNC__
919 #define __FUNC__ "MatZeroRows_SeqBAIJ"
920 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
921 {
922   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
923   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
924   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
925   Scalar      zero = 0.0;
926   MatScalar   *aa;
927 
928   PetscFunctionBegin;
929   /* Make a copy of the IS and  sort it */
930   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
931   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
932 
933   /* allocate memory for rows,sizes */
934   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
935   sizes = rows + is_n;
936 
937   /* initialize copy IS valurs to rows, and sort them */
938   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
939   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
940   if (baij->keepzeroedrows) {
941     for (i=0; i<is_n; i++) { sizes[i] = 1; }
942     bs_max = is_n;
943   } else {
944     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
945   }
946   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
947 
948   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
949     row   = rows[j];
950     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
951     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
952     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
953     if (sizes[i] == bs && !baij->keepzeroedrows) {
954       if (diag) {
955         if (baij->ilen[row/bs] > 0) {
956           baij->ilen[row/bs] = 1;
957           baij->j[baij->i[row/bs]] = row/bs;
958           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
959         }
960         /* Now insert all the diagoanl values for this bs */
961         for (k=0; k<bs; k++) {
962           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
963         }
964       } else { /* (!diag) */
965         baij->ilen[row/bs] = 0;
966       } /* end (!diag) */
967     } else { /* (sizes[i] != bs) */
968 #if defined (PETSC_USE_DEBUG)
969       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
970 #endif
971       for (k=0; k<count; k++) {
972         aa[0] = zero;
973         aa+=bs;
974       }
975       if (diag) {
976         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
977       }
978     }
979   }
980 
981   ierr = PetscFree(rows);CHKERRQ(ierr);
982   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNC__
987 #define __FUNC__ "MatSetValues_SeqBAIJ"
988 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
989 {
990   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
991   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
992   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
993   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
994   int         ridx,cidx,bs2=a->bs2,ierr;
995   MatScalar   *ap,value,*aa=a->a,*bap;
996 
997   PetscFunctionBegin;
998   for (k=0; k<m; k++) { /* loop over added rows */
999     row  = im[k]; brow = row/bs;
1000     if (row < 0) continue;
1001 #if defined(PETSC_USE_BOPT_g)
1002     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1003 #endif
1004     rp   = aj + ai[brow];
1005     ap   = aa + bs2*ai[brow];
1006     rmax = imax[brow];
1007     nrow = ailen[brow];
1008     low  = 0;
1009     for (l=0; l<n; l++) { /* loop over added columns */
1010       if (in[l] < 0) continue;
1011 #if defined(PETSC_USE_BOPT_g)
1012       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1013 #endif
1014       col = in[l]; bcol = col/bs;
1015       ridx = row % bs; cidx = col % bs;
1016       if (roworiented) {
1017         value = v[l + k*n];
1018       } else {
1019         value = v[k + l*m];
1020       }
1021       if (!sorted) low = 0; high = nrow;
1022       while (high-low > 7) {
1023         t = (low+high)/2;
1024         if (rp[t] > bcol) high = t;
1025         else              low  = t;
1026       }
1027       for (i=low; i<high; i++) {
1028         if (rp[i] > bcol) break;
1029         if (rp[i] == bcol) {
1030           bap  = ap +  bs2*i + bs*cidx + ridx;
1031           if (is == ADD_VALUES) *bap += value;
1032           else                  *bap  = value;
1033           goto noinsert1;
1034         }
1035       }
1036       if (nonew == 1) goto noinsert1;
1037       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1038       if (nrow >= rmax) {
1039         /* there is no extra room in row, therefore enlarge */
1040         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1041         MatScalar *new_a;
1042 
1043         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1044 
1045         /* Malloc new storage space */
1046         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1047         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
1048         new_j   = (int*)(new_a + bs2*new_nz);
1049         new_i   = new_j + new_nz;
1050 
1051         /* copy over old data into new slots */
1052         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1053         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1054         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1055         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1056         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1057         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1058         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1059         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1060         /* free up old matrix storage */
1061         ierr = PetscFree(a->a);CHKERRQ(ierr);
1062         if (!a->singlemalloc) {
1063           ierr = PetscFree(a->i);CHKERRQ(ierr);
1064           ierr = PetscFree(a->j);CHKERRQ(ierr);
1065         }
1066         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1067         a->singlemalloc = PETSC_TRUE;
1068 
1069         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1070         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1071         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1072         a->maxnz += bs2*CHUNKSIZE;
1073         a->reallocs++;
1074         a->nz++;
1075       }
1076       N = nrow++ - 1;
1077       /* shift up all the later entries in this row */
1078       for (ii=N; ii>=i; ii--) {
1079         rp[ii+1] = rp[ii];
1080         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1081       }
1082       if (N>=i) {
1083         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1084       }
1085       rp[i]                      = bcol;
1086       ap[bs2*i + bs*cidx + ridx] = value;
1087       noinsert1:;
1088       low = i;
1089     }
1090     ailen[brow] = nrow;
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*);
1096 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal);
1097 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1098 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1099 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1100 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1101 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1102 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1103 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1104 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1105 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1106 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1107 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1108 extern int MatZeroEntries_SeqBAIJ(Mat);
1109 
1110 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1111 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1112 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1113 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1114 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1115 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1116 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1117 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1118 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1119 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1120 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1121 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1122 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1123 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1124 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1125 
1126 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1127 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1128 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1129 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1130 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1131 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1132 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1133 
1134 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1135 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1136 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1137 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1138 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1139 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1140 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1141 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1142 
1143 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1144 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1145 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1146 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1147 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1148 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1149 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1150 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1151 
1152 #undef __FUNC__
1153 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1154 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1155 {
1156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1157   Mat         outA;
1158   int         ierr;
1159   PetscTruth  row_identity,col_identity;
1160 
1161   PetscFunctionBegin;
1162   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1163   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1164   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1165   if (!row_identity || !col_identity) {
1166     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1167   }
1168 
1169   outA          = inA;
1170   inA->factor   = FACTOR_LU;
1171 
1172   if (!a->diag) {
1173     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1174   }
1175   /*
1176       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1177       for ILU(0) factorization with natural ordering
1178   */
1179   switch (a->bs) {
1180   case 1:
1181     inA->ops->solvetranspose   = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1182     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1183   case 2:
1184     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1185     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1186     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1187     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1188     break;
1189   case 3:
1190     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1191     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1192     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1193     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1194     break;
1195   case 4:
1196     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1197     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1198     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1199     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1200     break;
1201   case 5:
1202     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1203     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1204     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1205     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1206     break;
1207   case 6:
1208     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1209     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1210     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1211     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1212     break;
1213   case 7:
1214     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1215     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1216     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1217     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1218     break;
1219   default:
1220     a->row        = row;
1221     a->col        = col;
1222     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1223     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1224 
1225     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1226     ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1227     PLogObjectParent(inA,a->icol);
1228 
1229     if (!a->solve_work) {
1230       a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1231       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1232     }
1233   }
1234 
1235   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1236 
1237   PetscFunctionReturn(0);
1238 }
1239 #undef __FUNC__
1240 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1241 int MatPrintHelp_SeqBAIJ(Mat A)
1242 {
1243   static PetscTruth called = PETSC_FALSE;
1244   MPI_Comm          comm = A->comm;
1245   int               ierr;
1246 
1247   PetscFunctionBegin;
1248   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1249   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1250   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 EXTERN_C_BEGIN
1255 #undef __FUNC__
1256 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1257 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1258 {
1259   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1260   int         i,nz,n;
1261 
1262   PetscFunctionBegin;
1263   nz = baij->maxnz;
1264   n  = baij->n;
1265   for (i=0; i<nz; i++) {
1266     baij->j[i] = indices[i];
1267   }
1268   baij->nz = nz;
1269   for (i=0; i<n; i++) {
1270     baij->ilen[i] = baij->imax[i];
1271   }
1272 
1273   PetscFunctionReturn(0);
1274 }
1275 EXTERN_C_END
1276 
1277 #undef __FUNC__
1278 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1279 /*@
1280     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1281        in the matrix.
1282 
1283   Input Parameters:
1284 +  mat - the SeqBAIJ matrix
1285 -  indices - the column indices
1286 
1287   Level: advanced
1288 
1289   Notes:
1290     This can be called if you have precomputed the nonzero structure of the
1291   matrix and want to provide it to the matrix object to improve the performance
1292   of the MatSetValues() operation.
1293 
1294     You MUST have set the correct numbers of nonzeros per row in the call to
1295   MatCreateSeqBAIJ().
1296 
1297     MUST be called before any calls to MatSetValues();
1298 
1299 @*/
1300 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1301 {
1302   int ierr,(*f)(Mat,int *);
1303 
1304   PetscFunctionBegin;
1305   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1306   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1307   if (f) {
1308     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1309   } else {
1310     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1311   }
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 /* -------------------------------------------------------------------*/
1316 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1317        MatGetRow_SeqBAIJ,
1318        MatRestoreRow_SeqBAIJ,
1319        MatMult_SeqBAIJ_N,
1320        MatMultAdd_SeqBAIJ_N,
1321        MatMultTranspose_SeqBAIJ,
1322        MatMultTransposeAdd_SeqBAIJ,
1323        MatSolve_SeqBAIJ_N,
1324        0,
1325        0,
1326        0,
1327        MatLUFactor_SeqBAIJ,
1328        0,
1329        0,
1330        MatTranspose_SeqBAIJ,
1331        MatGetInfo_SeqBAIJ,
1332        MatEqual_SeqBAIJ,
1333        MatGetDiagonal_SeqBAIJ,
1334        MatDiagonalScale_SeqBAIJ,
1335        MatNorm_SeqBAIJ,
1336        0,
1337        MatAssemblyEnd_SeqBAIJ,
1338        0,
1339        MatSetOption_SeqBAIJ,
1340        MatZeroEntries_SeqBAIJ,
1341        MatZeroRows_SeqBAIJ,
1342        MatLUFactorSymbolic_SeqBAIJ,
1343        MatLUFactorNumeric_SeqBAIJ_N,
1344        0,
1345        0,
1346        MatGetSize_SeqBAIJ,
1347        MatGetSize_SeqBAIJ,
1348        MatGetOwnershipRange_SeqBAIJ,
1349        MatILUFactorSymbolic_SeqBAIJ,
1350        0,
1351        0,
1352        0,
1353        MatDuplicate_SeqBAIJ,
1354        0,
1355        0,
1356        MatILUFactor_SeqBAIJ,
1357        0,
1358        0,
1359        MatGetSubMatrices_SeqBAIJ,
1360        MatIncreaseOverlap_SeqBAIJ,
1361        MatGetValues_SeqBAIJ,
1362        0,
1363        MatPrintHelp_SeqBAIJ,
1364        MatScale_SeqBAIJ,
1365        0,
1366        0,
1367        0,
1368        MatGetBlockSize_SeqBAIJ,
1369        MatGetRowIJ_SeqBAIJ,
1370        MatRestoreRowIJ_SeqBAIJ,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        MatSetValuesBlocked_SeqBAIJ,
1378        MatGetSubMatrix_SeqBAIJ,
1379        0,
1380        0,
1381        MatGetMaps_Petsc};
1382 
1383 EXTERN_C_BEGIN
1384 #undef __FUNC__
1385 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1386 int MatStoreValues_SeqBAIJ(Mat mat)
1387 {
1388   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1389   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1390   int         ierr;
1391 
1392   PetscFunctionBegin;
1393   if (aij->nonew != 1) {
1394     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1395   }
1396 
1397   /* allocate space for values if not already there */
1398   if (!aij->saved_values) {
1399     aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1400   }
1401 
1402   /* copy values over */
1403   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 EXTERN_C_END
1407 
1408 EXTERN_C_BEGIN
1409 #undef __FUNC__
1410 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1411 int MatRetrieveValues_SeqBAIJ(Mat mat)
1412 {
1413   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1414   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1415 
1416   PetscFunctionBegin;
1417   if (aij->nonew != 1) {
1418     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1419   }
1420   if (!aij->saved_values) {
1421     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1422   }
1423 
1424   /* copy values over */
1425   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 EXTERN_C_END
1429 
1430 #undef __FUNC__
1431 #define __FUNC__ "MatCreateSeqBAIJ"
1432 /*@C
1433    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1434    compressed row) format.  For good matrix assembly performance the
1435    user should preallocate the matrix storage by setting the parameter nz
1436    (or the array nnz).  By setting these parameters accurately, performance
1437    during matrix assembly can be increased by more than a factor of 50.
1438 
1439    Collective on MPI_Comm
1440 
1441    Input Parameters:
1442 +  comm - MPI communicator, set to PETSC_COMM_SELF
1443 .  bs - size of block
1444 .  m - number of rows
1445 .  n - number of columns
1446 .  nz - number of block nonzeros per block row (same for all rows)
1447 -  nnz - array containing the number of block nonzeros in the various block rows
1448          (possibly different for each block row) or PETSC_NULL
1449 
1450    Output Parameter:
1451 .  A - the matrix
1452 
1453    Options Database Keys:
1454 .   -mat_no_unroll - uses code that does not unroll the loops in the
1455                      block calculations (much slower)
1456 .    -mat_block_size - size of the blocks to use
1457 
1458    Level: intermediate
1459 
1460    Notes:
1461    The block AIJ format is fully compatible with standard Fortran 77
1462    storage.  That is, the stored row and column indices can begin at
1463    either one (as in Fortran) or zero.  See the users' manual for details.
1464 
1465    Specify the preallocated storage with either nz or nnz (not both).
1466    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1467    allocation.  For additional details, see the users manual chapter on
1468    matrices.
1469 
1470 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1471 @*/
1472 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1473 {
1474   Mat         B;
1475   Mat_SeqBAIJ *b;
1476   int         i,len,ierr,mbs,nbs,bs2,size;
1477   PetscTruth  flg;
1478 
1479   PetscFunctionBegin;
1480   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1481   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1482 
1483   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1484   mbs  = m/bs;
1485   nbs  = n/bs;
1486   bs2  = bs*bs;
1487 
1488   if (mbs*bs!=m || nbs*bs!=n) {
1489     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1490   }
1491 
1492   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1493   if (nnz) {
1494     for (i=0; i<mbs; i++) {
1495       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1496     }
1497   }
1498 
1499   *A = 0;
1500   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1501   PLogObjectCreate(B);
1502   B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1503   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1504   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1505   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1506   if (!flg) {
1507     switch (bs) {
1508     case 1:
1509       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1510       B->ops->solve           = MatSolve_SeqBAIJ_1;
1511       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1512       B->ops->mult            = MatMult_SeqBAIJ_1;
1513       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1514       break;
1515     case 2:
1516       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1517       B->ops->solve           = MatSolve_SeqBAIJ_2;
1518       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1519       B->ops->mult            = MatMult_SeqBAIJ_2;
1520       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1521       break;
1522     case 3:
1523       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1524       B->ops->solve           = MatSolve_SeqBAIJ_3;
1525       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1526       B->ops->mult            = MatMult_SeqBAIJ_3;
1527       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1528       break;
1529     case 4:
1530       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1531       B->ops->solve           = MatSolve_SeqBAIJ_4;
1532       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1533       B->ops->mult            = MatMult_SeqBAIJ_4;
1534       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1535       break;
1536     case 5:
1537       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1538       B->ops->solve           = MatSolve_SeqBAIJ_5;
1539       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1540       B->ops->mult            = MatMult_SeqBAIJ_5;
1541       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1542       break;
1543     case 6:
1544       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1545       B->ops->solve           = MatSolve_SeqBAIJ_6;
1546       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1547       B->ops->mult            = MatMult_SeqBAIJ_6;
1548       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1549       break;
1550     case 7:
1551       B->ops->mult            = MatMult_SeqBAIJ_7;
1552       B->ops->solve           = MatSolve_SeqBAIJ_7;
1553       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
1554       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1555       break;
1556     }
1557   }
1558   B->ops->destroy     = MatDestroy_SeqBAIJ;
1559   B->ops->view        = MatView_SeqBAIJ;
1560   B->factor           = 0;
1561   B->lupivotthreshold = 1.0;
1562   B->mapping          = 0;
1563   b->row              = 0;
1564   b->col              = 0;
1565   b->icol             = 0;
1566   b->reallocs         = 0;
1567   b->saved_values     = 0;
1568 
1569   b->m       = m; B->m = m; B->M = m;
1570   b->n       = n; B->n = n; B->N = n;
1571 
1572   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1573   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1574 
1575   b->mbs     = mbs;
1576   b->nbs     = nbs;
1577   b->imax    = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax);
1578   if (!nnz) {
1579     if (nz == PETSC_DEFAULT) nz = 5;
1580     else if (nz <= 0)        nz = 1;
1581     for (i=0; i<mbs; i++) b->imax[i] = nz;
1582     nz = nz*mbs;
1583   } else {
1584     nz = 0;
1585     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1586   }
1587 
1588   /* allocate the matrix space */
1589   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1590   b->a  = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a);
1591   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1592   b->j  = (int*)(b->a + nz*bs2);
1593   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1594   b->i  = b->j + nz;
1595   b->singlemalloc = PETSC_TRUE;
1596 
1597   b->i[0] = 0;
1598   for (i=1; i<mbs+1; i++) {
1599     b->i[i] = b->i[i-1] + b->imax[i-1];
1600   }
1601 
1602   /* b->ilen will count nonzeros in each block row so far. */
1603   b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));
1604   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1605   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1606 
1607   b->bs               = bs;
1608   b->bs2              = bs2;
1609   b->mbs              = mbs;
1610   b->nz               = 0;
1611   b->maxnz            = nz*bs2;
1612   b->sorted           = PETSC_FALSE;
1613   b->roworiented      = PETSC_TRUE;
1614   b->nonew            = 0;
1615   b->diag             = 0;
1616   b->solve_work       = 0;
1617   b->mult_work        = 0;
1618   b->spptr            = 0;
1619   B->info.nz_unneeded = (PetscReal)b->maxnz;
1620   b->keepzeroedrows   = PETSC_FALSE;
1621 
1622   *A = B;
1623   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1624   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1625 
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1627                                      "MatStoreValues_SeqBAIJ",
1628                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1630                                      "MatRetrieveValues_SeqBAIJ",
1631                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1633                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1634                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNC__
1639 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1640 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1641 {
1642   Mat         C;
1643   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1644   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1645 
1646   PetscFunctionBegin;
1647   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1648 
1649   *B = 0;
1650   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1651   PLogObjectCreate(C);
1652   C->data           = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1653   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1654   C->ops->destroy   = MatDestroy_SeqBAIJ;
1655   C->ops->view      = MatView_SeqBAIJ;
1656   C->factor         = A->factor;
1657   c->row            = 0;
1658   c->col            = 0;
1659   c->icol           = 0;
1660   c->saved_values   = 0;
1661   c->keepzeroedrows = a->keepzeroedrows;
1662   C->assembled      = PETSC_TRUE;
1663 
1664   c->m = C->m   = a->m;
1665   c->n = C->n   = a->n;
1666   C->M          = a->m;
1667   C->N          = a->n;
1668 
1669   c->bs         = a->bs;
1670   c->bs2        = a->bs2;
1671   c->mbs        = a->mbs;
1672   c->nbs        = a->nbs;
1673 
1674   c->imax       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1675   c->ilen       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1676   for (i=0; i<mbs; i++) {
1677     c->imax[i] = a->imax[i];
1678     c->ilen[i] = a->ilen[i];
1679   }
1680 
1681   /* allocate the matrix space */
1682   c->singlemalloc = PETSC_TRUE;
1683   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1684   c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a);
1685   c->j = (int*)(c->a + nz*bs2);
1686   c->i = c->j + nz;
1687   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1688   if (mbs > 0) {
1689     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1690     if (cpvalues == MAT_COPY_VALUES) {
1691       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1692     } else {
1693       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1694     }
1695   }
1696 
1697   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1698   c->sorted      = a->sorted;
1699   c->roworiented = a->roworiented;
1700   c->nonew       = a->nonew;
1701 
1702   if (a->diag) {
1703     c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag);
1704     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1705     for (i=0; i<mbs; i++) {
1706       c->diag[i] = a->diag[i];
1707     }
1708   } else c->diag        = 0;
1709   c->nz                 = a->nz;
1710   c->maxnz              = a->maxnz;
1711   c->solve_work         = 0;
1712   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1713   c->mult_work          = 0;
1714   *B = C;
1715   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNC__
1720 #define __FUNC__ "MatLoad_SeqBAIJ"
1721 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1722 {
1723   Mat_SeqBAIJ  *a;
1724   Mat          B;
1725   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1726   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1727   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1728   int          *masked,nmask,tmp,bs2,ishift;
1729   Scalar       *aa;
1730   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1731 
1732   PetscFunctionBegin;
1733   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1734   bs2  = bs*bs;
1735 
1736   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1737   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1738   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1739   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1740   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1741   M = header[1]; N = header[2]; nz = header[3];
1742 
1743   if (header[3] < 0) {
1744     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1745   }
1746 
1747   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1748 
1749   /*
1750      This code adds extra rows to make sure the number of rows is
1751     divisible by the blocksize
1752   */
1753   mbs        = M/bs;
1754   extra_rows = bs - M + bs*(mbs);
1755   if (extra_rows == bs) extra_rows = 0;
1756   else                  mbs++;
1757   if (extra_rows) {
1758     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1759   }
1760 
1761   /* read in row lengths */
1762   rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1763   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1764   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1765 
1766   /* read in column indices */
1767   jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj);
1768   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1769   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1770 
1771   /* loop over row lengths determining block row lengths */
1772   browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1773   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1774   mask        = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask);
1775   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1776   masked      = mask + mbs;
1777   rowcount    = 0; nzcount = 0;
1778   for (i=0; i<mbs; i++) {
1779     nmask = 0;
1780     for (j=0; j<bs; j++) {
1781       kmax = rowlengths[rowcount];
1782       for (k=0; k<kmax; k++) {
1783         tmp = jj[nzcount++]/bs;
1784         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1785       }
1786       rowcount++;
1787     }
1788     browlengths[i] += nmask;
1789     /* zero out the mask elements we set */
1790     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1791   }
1792 
1793   /* create our matrix */
1794   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1795   B = *A;
1796   a = (Mat_SeqBAIJ*)B->data;
1797 
1798   /* set matrix "i" values */
1799   a->i[0] = 0;
1800   for (i=1; i<= mbs; i++) {
1801     a->i[i]      = a->i[i-1] + browlengths[i-1];
1802     a->ilen[i-1] = browlengths[i-1];
1803   }
1804   a->nz         = 0;
1805   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1806 
1807   /* read in nonzero values */
1808   aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1809   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1810   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1811 
1812   /* set "a" and "j" values into matrix */
1813   nzcount = 0; jcount = 0;
1814   for (i=0; i<mbs; i++) {
1815     nzcountb = nzcount;
1816     nmask    = 0;
1817     for (j=0; j<bs; j++) {
1818       kmax = rowlengths[i*bs+j];
1819       for (k=0; k<kmax; k++) {
1820         tmp = jj[nzcount++]/bs;
1821 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1822       }
1823       rowcount++;
1824     }
1825     /* sort the masked values */
1826     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1827 
1828     /* set "j" values into matrix */
1829     maskcount = 1;
1830     for (j=0; j<nmask; j++) {
1831       a->j[jcount++]  = masked[j];
1832       mask[masked[j]] = maskcount++;
1833     }
1834     /* set "a" values into matrix */
1835     ishift = bs2*a->i[i];
1836     for (j=0; j<bs; j++) {
1837       kmax = rowlengths[i*bs+j];
1838       for (k=0; k<kmax; k++) {
1839         tmp       = jj[nzcountb]/bs ;
1840         block     = mask[tmp] - 1;
1841         point     = jj[nzcountb] - bs*tmp;
1842         idx       = ishift + bs2*block + j + bs*point;
1843         a->a[idx] = aa[nzcountb++];
1844       }
1845     }
1846     /* zero out the mask elements we set */
1847     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1848   }
1849   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1850 
1851   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1852   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1853   ierr = PetscFree(aa);CHKERRQ(ierr);
1854   ierr = PetscFree(jj);CHKERRQ(ierr);
1855   ierr = PetscFree(mask);CHKERRQ(ierr);
1856 
1857   B->assembled = PETSC_TRUE;
1858 
1859   ierr = MatView_Private(B);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 
1864 
1865