xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c38d4ed214221df9ea04de46f7761bef149d00ff)
1 /*$Id: baij.c,v 1.188 1999/11/05 14:45:32 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
19 int MatMissingDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         *diag = a->diag, *jj = a->j,i;
23 
24   PetscFunctionBegin;
25   for ( i=0; i<a->mbs; i++ ) {
26     if (jj[diag[i]] != i) {
27       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
28     }
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNC__
34 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
35 int MatMarkDiag_SeqBAIJ(Mat A)
36 {
37   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
38   int         i,j, *diag, m = a->mbs;
39 
40   PetscFunctionBegin;
41   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
42   PLogObjectMemory(A,(m+1)*sizeof(int));
43   for ( i=0; i<m; i++ ) {
44     diag[i] = a->i[i+1];
45     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
46       if (a->j[j] == i) {
47         diag[i] = j;
48         break;
49       }
50     }
51   }
52   a->diag = diag;
53   PetscFunctionReturn(0);
54 }
55 
56 
57 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
58 
59 #undef __FUNC__
60 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
61 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
62                             PetscTruth *done)
63 {
64   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
65   int         ierr, n = a->mbs,i;
66 
67   PetscFunctionBegin;
68   *nn = n;
69   if (!ia) PetscFunctionReturn(0);
70   if (symmetric) {
71     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
72   } else if (oshift == 1) {
73     /* temporarily add 1 to i and j indices */
74     int nz = a->i[n] + 1;
75     for ( i=0; i<nz; i++ ) a->j[i]++;
76     for ( i=0; i<n+1; i++ ) a->i[i]++;
77     *ia = a->i; *ja = a->j;
78   } else {
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNC__
86 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
87 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
88                                 PetscTruth *done)
89 {
90   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
91   int         i,n = a->mbs,ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95   if (symmetric) {
96     ierr = PetscFree(*ia);CHKERRQ(ierr);
97     ierr = PetscFree(*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) {
99     int nz = a->i[n];
100     for ( i=0; i<nz; i++ ) a->j[i]--;
101     for ( i=0; i<n+1; i++ ) a->i[i]--;
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
108 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
109 {
110   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
111 
112   PetscFunctionBegin;
113   *bs = baij->bs;
114   PetscFunctionReturn(0);
115 }
116 
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatDestroy_SeqBAIJ"
120 int MatDestroy_SeqBAIJ(Mat A)
121 {
122   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
123   int         ierr;
124 
125   PetscFunctionBegin;
126   if (A->mapping) {
127     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
128   }
129   if (A->bmapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
131   }
132   if (A->rmap) {
133     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
134   }
135   if (A->cmap) {
136     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
137   }
138 #if defined(PETSC_USE_LOG)
139   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
140 #endif
141   ierr = PetscFree(a->a);CHKERRQ(ierr);
142   if (!a->singlemalloc) {
143     ierr = PetscFree(a->i);CHKERRQ(ierr);
144     ierr = PetscFree(a->j);CHKERRQ(ierr);
145   }
146   if (a->row) {
147     ierr = ISDestroy(a->row);CHKERRQ(ierr);
148   }
149   if (a->col) {
150     ierr = ISDestroy(a->col);CHKERRQ(ierr);
151   }
152   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
153   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
154   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
155   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
156   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
157   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
158   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
159   ierr = PetscFree(a);CHKERRQ(ierr);
160   PLogObjectDestroy(A);
161   PetscHeaderDestroy(A);
162   PetscFunctionReturn(0);
163 }
164 
165 #undef __FUNC__
166 #define __FUNC__ "MatSetOption_SeqBAIJ"
167 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
168 {
169   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
170 
171   PetscFunctionBegin;
172   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
173   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
174   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
175   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
176   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
177   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
178   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
179   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
180   else if (op == MAT_ROWS_SORTED ||
181            op == MAT_ROWS_UNSORTED ||
182            op == MAT_SYMMETRIC ||
183            op == MAT_STRUCTURALLY_SYMMETRIC ||
184            op == MAT_YES_NEW_DIAGONALS ||
185            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
186            op == MAT_USE_HASH_TABLE) {
187     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
188   } else if (op == MAT_NO_NEW_DIAGONALS) {
189     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
190   } else {
191     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 
197 #undef __FUNC__
198 #define __FUNC__ "MatGetSize_SeqBAIJ"
199 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
200 {
201   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
202 
203   PetscFunctionBegin;
204   if (m) *m = a->m;
205   if (n) *n = a->n;
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNC__
210 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
211 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
212 {
213   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
214 
215   PetscFunctionBegin;
216   *m = 0; *n = a->m;
217   PetscFunctionReturn(0);
218 }
219 
220 #undef __FUNC__
221 #define __FUNC__ "MatGetRow_SeqBAIJ"
222 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
223 {
224   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
225   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
226   MatScalar    *aa,*aa_i;
227   Scalar       *v_i;
228 
229   PetscFunctionBegin;
230   bs  = a->bs;
231   ai  = a->i;
232   aj  = a->j;
233   aa  = a->a;
234   bs2 = a->bs2;
235 
236   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
237 
238   bn  = row/bs;   /* Block number */
239   bp  = row % bs; /* Block Position */
240   M   = ai[bn+1] - ai[bn];
241   *nz = bs*M;
242 
243   if (v) {
244     *v = 0;
245     if (*nz) {
246       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
247       for ( i=0; i<M; i++ ) { /* for each block in the block row */
248         v_i  = *v + i*bs;
249         aa_i = aa + bs2*(ai[bn] + i);
250         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
251       }
252     }
253   }
254 
255   if (idx) {
256     *idx = 0;
257     if (*nz) {
258       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
259       for ( i=0; i<M; i++ ) { /* for each block in the block row */
260         idx_i = *idx + i*bs;
261         itmp  = bs*aj[ai[bn] + i];
262         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
263       }
264     }
265   }
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNC__
270 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
271 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
272 {
273   int ierr;
274 
275   PetscFunctionBegin;
276   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
277   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNC__
282 #define __FUNC__ "MatTranspose_SeqBAIJ"
283 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
284 {
285   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
286   Mat         C;
287   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
288   int         *rows,*cols,bs2=a->bs2;
289   MatScalar   *array = a->a;
290 
291   PetscFunctionBegin;
292   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
293   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
294   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
295 
296   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
297   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
298   ierr = PetscFree(col);CHKERRQ(ierr);
299   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
300   cols = rows + bs;
301   for ( i=0; i<mbs; i++ ) {
302     cols[0] = i*bs;
303     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
304     len = ai[i+1] - ai[i];
305     for ( j=0; j<len; j++ ) {
306       rows[0] = (*aj++)*bs;
307       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
308       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
309       array += bs2;
310     }
311   }
312   ierr = PetscFree(rows);CHKERRQ(ierr);
313 
314   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
316 
317   if (B != PETSC_NULL) {
318     *B = C;
319   } else {
320     PetscOps *Abops;
321     MatOps   Aops;
322 
323     /* This isn't really an in-place transpose */
324     ierr = PetscFree(a->a);CHKERRQ(ierr);
325     if (!a->singlemalloc) {
326       ierr = PetscFree(a->i);CHKERRQ(ierr);
327       ierr = PetscFree(a->j);CHKERRQ(ierr);
328     }
329     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
330     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
331     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
332     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
333     ierr = PetscFree(a);CHKERRQ(ierr);
334 
335 
336     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
337     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
338 
339     /*
340        This is horrible, horrible code. We need to keep the
341       A pointers for the bops and ops but copy everything
342       else from C.
343     */
344     Abops    = A->bops;
345     Aops     = A->ops;
346     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
347     A->bops  = Abops;
348     A->ops   = Aops;
349     A->qlist = 0;
350 
351     PetscHeaderDestroy(C);
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNC__
357 #define __FUNC__ "MatView_SeqBAIJ_Binary"
358 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
362   Scalar      *aa;
363   FILE        *file;
364 
365   PetscFunctionBegin;
366   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
367   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
368   col_lens[0] = MAT_COOKIE;
369 
370   col_lens[1] = a->m;
371   col_lens[2] = a->n;
372   col_lens[3] = a->nz*bs2;
373 
374   /* store lengths of each row and write (including header) to file */
375   count = 0;
376   for ( i=0; i<a->mbs; i++ ) {
377     for ( j=0; j<bs; j++ ) {
378       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
379     }
380   }
381   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
382   ierr = PetscFree(col_lens);CHKERRQ(ierr);
383 
384   /* store column indices (zero start index) */
385   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
386   count = 0;
387   for ( i=0; i<a->mbs; i++ ) {
388     for ( j=0; j<bs; j++ ) {
389       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
390         for ( l=0; l<bs; l++ ) {
391           jj[count++] = bs*a->j[k] + l;
392         }
393       }
394     }
395   }
396   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
397   ierr = PetscFree(jj);CHKERRQ(ierr);
398 
399   /* store nonzero values */
400   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
401   count = 0;
402   for ( i=0; i<a->mbs; i++ ) {
403     for ( j=0; j<bs; j++ ) {
404       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
405         for ( l=0; l<bs; l++ ) {
406           aa[count++] = a->a[bs2*k + l*bs + j];
407         }
408       }
409     }
410   }
411   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
412   ierr = PetscFree(aa);CHKERRQ(ierr);
413 
414   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
415   if (file) {
416     fprintf(file,"-matload_block_size %d\n",a->bs);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNC__
422 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
423 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
424 {
425   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
426   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
427   char        *outputname;
428 
429   PetscFunctionBegin;
430   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
431   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
432   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
433     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
434   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
435     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
436   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
437     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
438     for ( i=0; i<a->mbs; i++ ) {
439       for ( j=0; j<bs; j++ ) {
440         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
441         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
442           for ( l=0; l<bs; l++ ) {
443 #if defined(PETSC_USE_COMPLEX)
444             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
445               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
446                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
447             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
448               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
449                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
450             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             }
453 #else
454             if (a->a[bs2*k + l*bs + j] != 0.0) {
455               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
456             }
457 #endif
458           }
459         }
460         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
461       }
462     }
463     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
464   } else {
465     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
466     for ( i=0; i<a->mbs; i++ ) {
467       for ( j=0; j<bs; j++ ) {
468         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
469         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
470           for ( l=0; l<bs; l++ ) {
471 #if defined(PETSC_USE_COMPLEX)
472             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
473               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
474                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
475             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
476               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
477                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
478             } else {
479               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             }
481 #else
482             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
483 #endif
484           }
485         }
486         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
487       }
488     }
489     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
490   }
491   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNC__
496 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
497 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
498 {
499   Mat          A = (Mat) Aa;
500   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
501   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
502   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
503   MatScalar    *aa;
504   MPI_Comm     comm;
505   Viewer       viewer;
506 
507   PetscFunctionBegin;
508   /*
509       This is nasty. If this is called from an originally parallel matrix
510    then all processes call this, but only the first has the matrix so the
511    rest should return immediately.
512   */
513   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
514   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
515   if (rank) PetscFunctionReturn(0);
516 
517   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
518 
519   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
520 
521   /* loop over matrix elements drawing boxes */
522   color = DRAW_BLUE;
523   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
524     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
525       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
526       x_l = a->j[j]*bs; x_r = x_l + 1.0;
527       aa = a->a + j*bs2;
528       for ( k=0; k<bs; k++ ) {
529         for ( l=0; l<bs; l++ ) {
530           if (PetscReal(*aa++) >=  0.) continue;
531           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
532         }
533       }
534     }
535   }
536   color = DRAW_CYAN;
537   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
538     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
539       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
540       x_l = a->j[j]*bs; x_r = x_l + 1.0;
541       aa = a->a + j*bs2;
542       for ( k=0; k<bs; k++ ) {
543         for ( l=0; l<bs; l++ ) {
544           if (PetscReal(*aa++) != 0.) continue;
545           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
546         }
547       }
548     }
549   }
550 
551   color = DRAW_RED;
552   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
553     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
554       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
555       x_l = a->j[j]*bs; x_r = x_l + 1.0;
556       aa = a->a + j*bs2;
557       for ( k=0; k<bs; k++ ) {
558         for ( l=0; l<bs; l++ ) {
559           if (PetscReal(*aa++) <= 0.) continue;
560           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
561         }
562       }
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNC__
569 #define __FUNC__ "MatView_SeqBAIJ_Draw"
570 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
571 {
572   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
573   int          ierr;
574   double       xl,yl,xr,yr,w,h;
575   Draw         draw;
576   PetscTruth   isnull;
577 
578   PetscFunctionBegin;
579 
580   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
581   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
582 
583   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
584   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
585   xr += w;    yr += h;  xl = -w;     yl = -h;
586   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
587   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
588   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592 #undef __FUNC__
593 #define __FUNC__ "MatView_SeqBAIJ"
594 int MatView_SeqBAIJ(Mat A,Viewer viewer)
595 {
596   int        ierr;
597   PetscTruth issocket,isascii,isbinary,isdraw;
598 
599   PetscFunctionBegin;
600   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
602   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
603   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
604   if (issocket) {
605     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
606   } else if (isascii){
607     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
608   } else if (isbinary) {
609     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
610   } else if (isdraw) {
611     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
612   } else {
613     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 
619 #undef __FUNC__
620 #define __FUNC__ "MatGetValues_SeqBAIJ"
621 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
622 {
623   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
624   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
625   int        *ai = a->i, *ailen = a->ilen;
626   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
627   MatScalar  *ap, *aa = a->a, zero = 0.0;
628 
629   PetscFunctionBegin;
630   for ( k=0; k<m; k++ ) { /* loop over rows */
631     row  = im[k]; brow = row/bs;
632     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
633     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
634     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
635     nrow = ailen[brow];
636     for ( l=0; l<n; l++ ) { /* loop over columns */
637       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
638       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
639       col  = in[l] ;
640       bcol = col/bs;
641       cidx = col%bs;
642       ridx = row%bs;
643       high = nrow;
644       low  = 0; /* assume unsorted */
645       while (high-low > 5) {
646         t = (low+high)/2;
647         if (rp[t] > bcol) high = t;
648         else             low  = t;
649       }
650       for ( i=low; i<high; i++ ) {
651         if (rp[i] > bcol) break;
652         if (rp[i] == bcol) {
653           *v++ = ap[bs2*i+bs*cidx+ridx];
654           goto finished;
655         }
656       }
657       *v++ = zero;
658       finished:;
659     }
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
667 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
668 {
669   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
670   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
671   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
672   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
673   Scalar      *value = v;
674   MatScalar   *ap,*aa=a->a,*bap;
675 
676   PetscFunctionBegin;
677   if (roworiented) {
678     stepval = (n-1)*bs;
679   } else {
680     stepval = (m-1)*bs;
681   }
682   for ( k=0; k<m; k++ ) { /* loop over added rows */
683     row  = im[k];
684     if (row < 0) continue;
685 #if defined(PETSC_USE_BOPT_g)
686     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
687 #endif
688     rp   = aj + ai[row];
689     ap   = aa + bs2*ai[row];
690     rmax = imax[row];
691     nrow = ailen[row];
692     low  = 0;
693     for ( l=0; l<n; l++ ) { /* loop over added columns */
694       if (in[l] < 0) continue;
695 #if defined(PETSC_USE_BOPT_g)
696       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
697 #endif
698       col = in[l];
699       if (roworiented) {
700         value = v + k*(stepval+bs)*bs + l*bs;
701       } else {
702         value = v + l*(stepval+bs)*bs + k*bs;
703       }
704       if (!sorted) low = 0; high = nrow;
705       while (high-low > 7) {
706         t = (low+high)/2;
707         if (rp[t] > col) high = t;
708         else             low  = t;
709       }
710       for ( i=low; i<high; i++ ) {
711         if (rp[i] > col) break;
712         if (rp[i] == col) {
713           bap  = ap +  bs2*i;
714           if (roworiented) {
715             if (is == ADD_VALUES) {
716               for ( ii=0; ii<bs; ii++,value+=stepval ) {
717                 for (jj=ii; jj<bs2; jj+=bs ) {
718                   bap[jj] += *value++;
719                 }
720               }
721             } else {
722               for ( ii=0; ii<bs; ii++,value+=stepval ) {
723                 for (jj=ii; jj<bs2; jj+=bs ) {
724                   bap[jj] = *value++;
725                 }
726               }
727             }
728           } else {
729             if (is == ADD_VALUES) {
730               for ( ii=0; ii<bs; ii++,value+=stepval ) {
731                 for (jj=0; jj<bs; jj++ ) {
732                   *bap++ += *value++;
733                 }
734               }
735             } else {
736               for ( ii=0; ii<bs; ii++,value+=stepval ) {
737                 for (jj=0; jj<bs; jj++ ) {
738                   *bap++  = *value++;
739                 }
740               }
741             }
742           }
743           goto noinsert2;
744         }
745       }
746       if (nonew == 1) goto noinsert2;
747       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
748       if (nrow >= rmax) {
749         /* there is no extra room in row, therefore enlarge */
750         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
751         MatScalar *new_a;
752 
753         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
754 
755         /* malloc new storage space */
756         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
757         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
758         new_j   = (int *) (new_a + bs2*new_nz);
759         new_i   = new_j + new_nz;
760 
761         /* copy over old data into new slots */
762         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
763         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
764         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
765         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
766         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
767         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
768         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
769         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
770         /* free up old matrix storage */
771         ierr = PetscFree(a->a);CHKERRQ(ierr);
772         if (!a->singlemalloc) {
773           ierr = PetscFree(a->i);CHKERRQ(ierr);
774           ierr = PetscFree(a->j);CHKERRQ(ierr);
775         }
776         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
777         a->singlemalloc = 1;
778 
779         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
780         rmax = imax[row] = imax[row] + CHUNKSIZE;
781         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
782         a->maxnz += bs2*CHUNKSIZE;
783         a->reallocs++;
784         a->nz++;
785       }
786       N = nrow++ - 1;
787       /* shift up all the later entries in this row */
788       for ( ii=N; ii>=i; ii-- ) {
789         rp[ii+1] = rp[ii];
790         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
791       }
792       if (N >= i) {
793         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
794       }
795       rp[i] = col;
796       bap   = ap +  bs2*i;
797       if (roworiented) {
798         for ( ii=0; ii<bs; ii++,value+=stepval) {
799           for (jj=ii; jj<bs2; jj+=bs ) {
800             bap[jj] = *value++;
801           }
802         }
803       } else {
804         for ( ii=0; ii<bs; ii++,value+=stepval ) {
805           for (jj=0; jj<bs; jj++ ) {
806             *bap++  = *value++;
807           }
808         }
809       }
810       noinsert2:;
811       low = i;
812     }
813     ailen[row] = nrow;
814   }
815   PetscFunctionReturn(0);
816 }
817 
818 
819 #undef __FUNC__
820 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
821 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
822 {
823   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
824   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
825   int        m = a->m,*ip, N, *ailen = a->ilen;
826   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
827   MatScalar  *aa = a->a, *ap;
828 
829   PetscFunctionBegin;
830   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
831 
832   if (m) rmax = ailen[0];
833   for ( i=1; i<mbs; i++ ) {
834     /* move each row back by the amount of empty slots (fshift) before it*/
835     fshift += imax[i-1] - ailen[i-1];
836     rmax   = PetscMax(rmax,ailen[i]);
837     if (fshift) {
838       ip = aj + ai[i]; ap = aa + bs2*ai[i];
839       N = ailen[i];
840       for ( j=0; j<N; j++ ) {
841         ip[j-fshift] = ip[j];
842         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
843       }
844     }
845     ai[i] = ai[i-1] + ailen[i-1];
846   }
847   if (mbs) {
848     fshift += imax[mbs-1] - ailen[mbs-1];
849     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
850   }
851   /* reset ilen and imax for each row */
852   for ( i=0; i<mbs; i++ ) {
853     ailen[i] = imax[i] = ai[i+1] - ai[i];
854   }
855   a->nz = ai[mbs];
856 
857   /* diagonals may have moved, so kill the diagonal pointers */
858   if (fshift && a->diag) {
859     ierr = PetscFree(a->diag);CHKERRQ(ierr);
860     PLogObjectMemory(A,-(m+1)*sizeof(int));
861     a->diag = 0;
862   }
863   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
864            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
865   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
866            a->reallocs);
867   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
868   a->reallocs          = 0;
869   A->info.nz_unneeded  = (double)fshift*bs2;
870 
871   PetscFunctionReturn(0);
872 }
873 
874 
875 
876 /*
877    This function returns an array of flags which indicate the locations of contiguous
878    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
879    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
880    Assume: sizes should be long enough to hold all the values.
881 */
882 #undef __FUNC__
883 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
884 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
885 {
886   int        i,j,k,row;
887   PetscTruth flg;
888 
889   PetscFunctionBegin;
890   for ( i=0,j=0; i<n; j++ ) {
891     row = idx[i];
892     if (row%bs!=0) { /* Not the begining of a block */
893       sizes[j] = 1;
894       i++;
895     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
896       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
897       i++;
898     } else { /* Begining of the block, so check if the complete block exists */
899       flg = PETSC_TRUE;
900       for ( k=1; k<bs; k++ ) {
901         if (row+k != idx[i+k]) { /* break in the block */
902           flg = PETSC_FALSE;
903           break;
904         }
905       }
906       if (flg == PETSC_TRUE) { /* No break in the bs */
907         sizes[j] = bs;
908         i+= bs;
909       } else {
910         sizes[j] = 1;
911         i++;
912       }
913     }
914   }
915   *bs_max = j;
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNC__
920 #define __FUNC__ "MatZeroRows_SeqBAIJ"
921 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
922 {
923   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
924   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
925   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
926   Scalar      zero = 0.0;
927   MatScalar   *aa;
928 
929   PetscFunctionBegin;
930   /* Make a copy of the IS and  sort it */
931   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
932   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
933 
934   /* allocate memory for rows,sizes */
935   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
936   sizes = rows + is_n;
937 
938   /* initialize copy IS valurs to rows, and sort them */
939   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
940   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
941   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
942   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
943 
944   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
945     row   = rows[j];
946     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
947     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
948     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
949     if (sizes[i] == bs) {
950       if (diag) {
951         if (baij->ilen[row/bs] > 0) {
952           baij->ilen[row/bs] = 1;
953           baij->j[baij->i[row/bs]] = row/bs;
954           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
955         }
956         /* Now insert all the diagoanl values for this bs */
957         for ( k=0; k<bs; k++ ) {
958           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
959         }
960       } else { /* (!diag) */
961         baij->ilen[row/bs] = 0;
962       } /* end (!diag) */
963     } else { /* (sizes[i] != bs) */
964 #if defined (PETSC_USE_DEBUG)
965       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
966 #endif
967       for ( k=0; k<count; k++ ) {
968         aa[0] = zero;
969         aa+=bs;
970       }
971       if (diag) {
972         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
973       }
974     }
975   }
976 
977   ierr = PetscFree(rows);CHKERRQ(ierr);
978   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNC__
983 #define __FUNC__ "MatSetValues_SeqBAIJ"
984 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
985 {
986   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
987   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
988   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
989   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
990   int         ridx,cidx,bs2=a->bs2,ierr;
991   MatScalar   *ap,value,*aa=a->a,*bap;
992 
993   PetscFunctionBegin;
994   for ( k=0; k<m; k++ ) { /* loop over added rows */
995     row  = im[k]; brow = row/bs;
996     if (row < 0) continue;
997 #if defined(PETSC_USE_BOPT_g)
998     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
999 #endif
1000     rp   = aj + ai[brow];
1001     ap   = aa + bs2*ai[brow];
1002     rmax = imax[brow];
1003     nrow = ailen[brow];
1004     low  = 0;
1005     for ( l=0; l<n; l++ ) { /* loop over added columns */
1006       if (in[l] < 0) continue;
1007 #if defined(PETSC_USE_BOPT_g)
1008       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1009 #endif
1010       col = in[l]; bcol = col/bs;
1011       ridx = row % bs; cidx = col % bs;
1012       if (roworiented) {
1013         value = v[l + k*n];
1014       } else {
1015         value = v[k + l*m];
1016       }
1017       if (!sorted) low = 0; high = nrow;
1018       while (high-low > 7) {
1019         t = (low+high)/2;
1020         if (rp[t] > bcol) high = t;
1021         else              low  = t;
1022       }
1023       for ( i=low; i<high; i++ ) {
1024         if (rp[i] > bcol) break;
1025         if (rp[i] == bcol) {
1026           bap  = ap +  bs2*i + bs*cidx + ridx;
1027           if (is == ADD_VALUES) *bap += value;
1028           else                  *bap  = value;
1029           goto noinsert1;
1030         }
1031       }
1032       if (nonew == 1) goto noinsert1;
1033       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1034       if (nrow >= rmax) {
1035         /* there is no extra room in row, therefore enlarge */
1036         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1037         MatScalar *new_a;
1038 
1039         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1040 
1041         /* Malloc new storage space */
1042         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1043         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1044         new_j   = (int *) (new_a + bs2*new_nz);
1045         new_i   = new_j + new_nz;
1046 
1047         /* copy over old data into new slots */
1048         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1049         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1050         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1051         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1052         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1053         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1054         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1055         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1056         /* free up old matrix storage */
1057         ierr = PetscFree(a->a);CHKERRQ(ierr);
1058         if (!a->singlemalloc) {
1059           ierr = PetscFree(a->i);CHKERRQ(ierr);
1060           ierr = PetscFree(a->j);CHKERRQ(ierr);
1061         }
1062         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1063         a->singlemalloc = 1;
1064 
1065         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1066         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1067         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1068         a->maxnz += bs2*CHUNKSIZE;
1069         a->reallocs++;
1070         a->nz++;
1071       }
1072       N = nrow++ - 1;
1073       /* shift up all the later entries in this row */
1074       for ( ii=N; ii>=i; ii-- ) {
1075         rp[ii+1] = rp[ii];
1076         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1077       }
1078       if (N>=i) {
1079         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1080       }
1081       rp[i]                      = bcol;
1082       ap[bs2*i + bs*cidx + ridx] = value;
1083       noinsert1:;
1084       low = i;
1085     }
1086     ailen[brow] = nrow;
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1092 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1093 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1094 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1095 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1096 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1097 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1098 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1099 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1100 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1101 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1102 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1103 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1104 extern int MatZeroEntries_SeqBAIJ(Mat);
1105 
1106 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1107 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1108 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1109 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1110 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1111 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1112 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1113 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1114 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec);
1115 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec);
1116 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec);
1117 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec);
1118 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec);
1119 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec);
1120 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec);
1121 
1122 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1123 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1124 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1125 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1126 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1127 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1128 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1129 
1130 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1131 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1132 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1133 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1134 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1135 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1136 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1137 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1138 
1139 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1140 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1141 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1142 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1143 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1144 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1145 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1146 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1147 
1148 #undef __FUNC__
1149 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1150 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1151 {
1152   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1153   Mat         outA;
1154   int         ierr;
1155   PetscTruth  row_identity, col_identity;
1156 
1157   PetscFunctionBegin;
1158   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1159   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1160   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1161   if (!row_identity || !col_identity) {
1162     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1163   }
1164 
1165   outA          = inA;
1166   inA->factor   = FACTOR_LU;
1167 
1168   if (!a->diag) {
1169     ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr);
1170   }
1171   /*
1172       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1173       for ILU(0) factorization with natural ordering
1174   */
1175   switch (a->bs) {
1176   case 1:
1177     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1178     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1179   case 2:
1180     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1181     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1182     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1183     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1184     break;
1185   case 3:
1186     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1187     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1188     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3_NaturalOrdering;
1189     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1190     break;
1191   case 4:
1192     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1193     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1194     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4_NaturalOrdering;
1195     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1196     break;
1197   case 5:
1198     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1199     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1200     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5_NaturalOrdering;
1201     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1202     break;
1203   case 6:
1204     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1205     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1206     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6_NaturalOrdering;
1207     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1208     break;
1209   case 7:
1210     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1211     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7_NaturalOrdering;
1212     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1213     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1214     break;
1215   default:
1216     a->row        = row;
1217     a->col        = col;
1218     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1219     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1220 
1221     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1222     ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1223     PLogObjectParent(inA,a->icol);
1224 
1225     if (!a->solve_work) {
1226       a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1227       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1228     }
1229   }
1230 
1231   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1232 
1233   PetscFunctionReturn(0);
1234 }
1235 #undef __FUNC__
1236 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1237 int MatPrintHelp_SeqBAIJ(Mat A)
1238 {
1239   static PetscTruth called = PETSC_FALSE;
1240   MPI_Comm          comm = A->comm;
1241   int               ierr;
1242 
1243   PetscFunctionBegin;
1244   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1245   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1246   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 EXTERN_C_BEGIN
1251 #undef __FUNC__
1252 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1253 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1254 {
1255   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1256   int         i,nz,n;
1257 
1258   PetscFunctionBegin;
1259   nz = baij->maxnz;
1260   n  = baij->n;
1261   for (i=0; i<nz; i++) {
1262     baij->j[i] = indices[i];
1263   }
1264   baij->nz = nz;
1265   for ( i=0; i<n; i++ ) {
1266     baij->ilen[i] = baij->imax[i];
1267   }
1268 
1269   PetscFunctionReturn(0);
1270 }
1271 EXTERN_C_END
1272 
1273 #undef __FUNC__
1274 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1275 /*@
1276     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1277        in the matrix.
1278 
1279   Input Parameters:
1280 +  mat - the SeqBAIJ matrix
1281 -  indices - the column indices
1282 
1283   Level: advanced
1284 
1285   Notes:
1286     This can be called if you have precomputed the nonzero structure of the
1287   matrix and want to provide it to the matrix object to improve the performance
1288   of the MatSetValues() operation.
1289 
1290     You MUST have set the correct numbers of nonzeros per row in the call to
1291   MatCreateSeqBAIJ().
1292 
1293     MUST be called before any calls to MatSetValues();
1294 
1295 @*/
1296 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1297 {
1298   int ierr,(*f)(Mat,int *);
1299 
1300   PetscFunctionBegin;
1301   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1302   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1303   if (f) {
1304     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1305   } else {
1306     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 /* -------------------------------------------------------------------*/
1312 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1313        MatGetRow_SeqBAIJ,
1314        MatRestoreRow_SeqBAIJ,
1315        MatMult_SeqBAIJ_N,
1316        MatMultAdd_SeqBAIJ_N,
1317        MatMultTrans_SeqBAIJ,
1318        MatMultTransAdd_SeqBAIJ,
1319        MatSolve_SeqBAIJ_N,
1320        0,
1321        0,
1322        0,
1323        MatLUFactor_SeqBAIJ,
1324        0,
1325        0,
1326        MatTranspose_SeqBAIJ,
1327        MatGetInfo_SeqBAIJ,
1328        MatEqual_SeqBAIJ,
1329        MatGetDiagonal_SeqBAIJ,
1330        MatDiagonalScale_SeqBAIJ,
1331        MatNorm_SeqBAIJ,
1332        0,
1333        MatAssemblyEnd_SeqBAIJ,
1334        0,
1335        MatSetOption_SeqBAIJ,
1336        MatZeroEntries_SeqBAIJ,
1337        MatZeroRows_SeqBAIJ,
1338        MatLUFactorSymbolic_SeqBAIJ,
1339        MatLUFactorNumeric_SeqBAIJ_N,
1340        0,
1341        0,
1342        MatGetSize_SeqBAIJ,
1343        MatGetSize_SeqBAIJ,
1344        MatGetOwnershipRange_SeqBAIJ,
1345        MatILUFactorSymbolic_SeqBAIJ,
1346        0,
1347        0,
1348        0,
1349        MatDuplicate_SeqBAIJ,
1350        0,
1351        0,
1352        MatILUFactor_SeqBAIJ,
1353        0,
1354        0,
1355        MatGetSubMatrices_SeqBAIJ,
1356        MatIncreaseOverlap_SeqBAIJ,
1357        MatGetValues_SeqBAIJ,
1358        0,
1359        MatPrintHelp_SeqBAIJ,
1360        MatScale_SeqBAIJ,
1361        0,
1362        0,
1363        0,
1364        MatGetBlockSize_SeqBAIJ,
1365        MatGetRowIJ_SeqBAIJ,
1366        MatRestoreRowIJ_SeqBAIJ,
1367        0,
1368        0,
1369        0,
1370        0,
1371        0,
1372        0,
1373        MatSetValuesBlocked_SeqBAIJ,
1374        MatGetSubMatrix_SeqBAIJ,
1375        0,
1376        0,
1377        MatGetMaps_Petsc};
1378 
1379 EXTERN_C_BEGIN
1380 #undef __FUNC__
1381 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1382 int MatStoreValues_SeqBAIJ(Mat mat)
1383 {
1384   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1385   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1386   int         ierr;
1387 
1388   PetscFunctionBegin;
1389   if (aij->nonew != 1) {
1390     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1391   }
1392 
1393   /* allocate space for values if not already there */
1394   if (!aij->saved_values) {
1395     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1396   }
1397 
1398   /* copy values over */
1399   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 EXTERN_C_END
1403 
1404 EXTERN_C_BEGIN
1405 #undef __FUNC__
1406 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1407 int MatRetrieveValues_SeqBAIJ(Mat mat)
1408 {
1409   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1410   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1411 
1412   PetscFunctionBegin;
1413   if (aij->nonew != 1) {
1414     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1415   }
1416   if (!aij->saved_values) {
1417     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1418   }
1419 
1420   /* copy values over */
1421   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 EXTERN_C_END
1425 
1426 #undef __FUNC__
1427 #define __FUNC__ "MatCreateSeqBAIJ"
1428 /*@C
1429    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1430    compressed row) format.  For good matrix assembly performance the
1431    user should preallocate the matrix storage by setting the parameter nz
1432    (or the array nnz).  By setting these parameters accurately, performance
1433    during matrix assembly can be increased by more than a factor of 50.
1434 
1435    Collective on MPI_Comm
1436 
1437    Input Parameters:
1438 +  comm - MPI communicator, set to PETSC_COMM_SELF
1439 .  bs - size of block
1440 .  m - number of rows
1441 .  n - number of columns
1442 .  nz - number of block nonzeros per block row (same for all rows)
1443 -  nnz - array containing the number of block nonzeros in the various block rows
1444          (possibly different for each block row) or PETSC_NULL
1445 
1446    Output Parameter:
1447 .  A - the matrix
1448 
1449    Options Database Keys:
1450 .   -mat_no_unroll - uses code that does not unroll the loops in the
1451                      block calculations (much slower)
1452 .    -mat_block_size - size of the blocks to use
1453 
1454    Level: intermediate
1455 
1456    Notes:
1457    The block AIJ format is fully compatible with standard Fortran 77
1458    storage.  That is, the stored row and column indices can begin at
1459    either one (as in Fortran) or zero.  See the users' manual for details.
1460 
1461    Specify the preallocated storage with either nz or nnz (not both).
1462    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1463    allocation.  For additional details, see the users manual chapter on
1464    matrices.
1465 
1466 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1467 @*/
1468 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1469 {
1470   Mat         B;
1471   Mat_SeqBAIJ *b;
1472   int         i,len,ierr,mbs,nbs,bs2,size;
1473   PetscTruth  flg;
1474 
1475   PetscFunctionBegin;
1476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1477   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1478 
1479   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1480   mbs  = m/bs;
1481   nbs  = n/bs;
1482   bs2  = bs*bs;
1483 
1484   if (mbs*bs!=m || nbs*bs!=n) {
1485     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1486   }
1487 
1488   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1489   if (nnz) {
1490     for (i=0; i<mbs; i++) {
1491       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1492     }
1493   }
1494 
1495   *A = 0;
1496   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1497   PLogObjectCreate(B);
1498   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1499   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1500   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1501   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1502   if (!flg) {
1503     switch (bs) {
1504     case 1:
1505       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1506       B->ops->solve           = MatSolve_SeqBAIJ_1;
1507       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_1;
1508       B->ops->mult            = MatMult_SeqBAIJ_1;
1509       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1510       break;
1511     case 2:
1512       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1513       B->ops->solve           = MatSolve_SeqBAIJ_2;
1514       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2;
1515       B->ops->mult            = MatMult_SeqBAIJ_2;
1516       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1517       break;
1518     case 3:
1519       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1520       B->ops->solve           = MatSolve_SeqBAIJ_3;
1521       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3;
1522       B->ops->mult            = MatMult_SeqBAIJ_3;
1523       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1524       break;
1525     case 4:
1526       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1527       B->ops->solve           = MatSolve_SeqBAIJ_4;
1528       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4;
1529       B->ops->mult            = MatMult_SeqBAIJ_4;
1530       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1531       break;
1532     case 5:
1533       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1534       B->ops->solve           = MatSolve_SeqBAIJ_5;
1535       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5;
1536       B->ops->mult            = MatMult_SeqBAIJ_5;
1537       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1538       break;
1539     case 6:
1540       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1541       B->ops->solve           = MatSolve_SeqBAIJ_6;
1542       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6;
1543       B->ops->mult            = MatMult_SeqBAIJ_6;
1544       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1545       break;
1546     case 7:
1547       B->ops->mult            = MatMult_SeqBAIJ_7;
1548       B->ops->solve           = MatSolve_SeqBAIJ_7;
1549       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7;
1550       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1551       break;
1552     }
1553   }
1554   B->ops->destroy     = MatDestroy_SeqBAIJ;
1555   B->ops->view        = MatView_SeqBAIJ;
1556   B->factor           = 0;
1557   B->lupivotthreshold = 1.0;
1558   B->mapping          = 0;
1559   b->row              = 0;
1560   b->col              = 0;
1561   b->icol             = 0;
1562   b->reallocs         = 0;
1563   b->saved_values     = 0;
1564 
1565   b->m       = m; B->m = m; B->M = m;
1566   b->n       = n; B->n = n; B->N = n;
1567 
1568   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1569   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1570 
1571   b->mbs     = mbs;
1572   b->nbs     = nbs;
1573   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1574   if (nnz == PETSC_NULL) {
1575     if (nz == PETSC_DEFAULT) nz = 5;
1576     else if (nz <= 0)        nz = 1;
1577     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1578     nz = nz*mbs;
1579   } else {
1580     nz = 0;
1581     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1582   }
1583 
1584   /* allocate the matrix space */
1585   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1586   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1587   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1588   b->j  = (int *) (b->a + nz*bs2);
1589   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1590   b->i  = b->j + nz;
1591   b->singlemalloc = 1;
1592 
1593   b->i[0] = 0;
1594   for (i=1; i<mbs+1; i++) {
1595     b->i[i] = b->i[i-1] + b->imax[i-1];
1596   }
1597 
1598   /* b->ilen will count nonzeros in each block row so far. */
1599   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1600   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1601   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1602 
1603   b->bs               = bs;
1604   b->bs2              = bs2;
1605   b->mbs              = mbs;
1606   b->nz               = 0;
1607   b->maxnz            = nz*bs2;
1608   b->sorted           = 0;
1609   b->roworiented      = 1;
1610   b->nonew            = 0;
1611   b->diag             = 0;
1612   b->solve_work       = 0;
1613   b->mult_work        = 0;
1614   b->spptr            = 0;
1615   B->info.nz_unneeded = (double)b->maxnz;
1616 
1617   *A = B;
1618   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1619   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1620 
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1622                                      "MatStoreValues_SeqBAIJ",
1623                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1625                                      "MatRetrieveValues_SeqBAIJ",
1626                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1628                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1629                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNC__
1634 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1635 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1636 {
1637   Mat         C;
1638   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1639   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1640 
1641   PetscFunctionBegin;
1642   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1643 
1644   *B = 0;
1645   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1646   PLogObjectCreate(C);
1647   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1648   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1649   C->ops->destroy = MatDestroy_SeqBAIJ;
1650   C->ops->view    = MatView_SeqBAIJ;
1651   C->factor       = A->factor;
1652   c->row          = 0;
1653   c->col          = 0;
1654   c->icol         = 0;
1655   c->saved_values = 0;
1656   C->assembled    = PETSC_TRUE;
1657 
1658   c->m = C->m   = a->m;
1659   c->n = C->n   = a->n;
1660   C->M          = a->m;
1661   C->N          = a->n;
1662 
1663   c->bs         = a->bs;
1664   c->bs2        = a->bs2;
1665   c->mbs        = a->mbs;
1666   c->nbs        = a->nbs;
1667 
1668   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1669   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1670   for ( i=0; i<mbs; i++ ) {
1671     c->imax[i] = a->imax[i];
1672     c->ilen[i] = a->ilen[i];
1673   }
1674 
1675   /* allocate the matrix space */
1676   c->singlemalloc = 1;
1677   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1678   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1679   c->j = (int *) (c->a + nz*bs2);
1680   c->i = c->j + nz;
1681   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1682   if (mbs > 0) {
1683     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1684     if (cpvalues == MAT_COPY_VALUES) {
1685       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1686     } else {
1687       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1688     }
1689   }
1690 
1691   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1692   c->sorted      = a->sorted;
1693   c->roworiented = a->roworiented;
1694   c->nonew       = a->nonew;
1695 
1696   if (a->diag) {
1697     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1698     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1699     for ( i=0; i<mbs; i++ ) {
1700       c->diag[i] = a->diag[i];
1701     }
1702   } else c->diag        = 0;
1703   c->nz                 = a->nz;
1704   c->maxnz              = a->maxnz;
1705   c->solve_work         = 0;
1706   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1707   c->mult_work          = 0;
1708   *B = C;
1709   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNC__
1714 #define __FUNC__ "MatLoad_SeqBAIJ"
1715 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1716 {
1717   Mat_SeqBAIJ  *a;
1718   Mat          B;
1719   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1720   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1721   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1722   int          *masked, nmask,tmp,bs2,ishift;
1723   Scalar       *aa;
1724   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1725 
1726   PetscFunctionBegin;
1727   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1728   bs2  = bs*bs;
1729 
1730   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1731   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1732   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1733   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1734   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1735   M = header[1]; N = header[2]; nz = header[3];
1736 
1737   if (header[3] < 0) {
1738     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1739   }
1740 
1741   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1742 
1743   /*
1744      This code adds extra rows to make sure the number of rows is
1745     divisible by the blocksize
1746   */
1747   mbs        = M/bs;
1748   extra_rows = bs - M + bs*(mbs);
1749   if (extra_rows == bs) extra_rows = 0;
1750   else                  mbs++;
1751   if (extra_rows) {
1752     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1753   }
1754 
1755   /* read in row lengths */
1756   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1757   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1758   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1759 
1760   /* read in column indices */
1761   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1762   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1763   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1764 
1765   /* loop over row lengths determining block row lengths */
1766   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1767   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1768   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1769   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1770   masked      = mask + mbs;
1771   rowcount    = 0; nzcount = 0;
1772   for ( i=0; i<mbs; i++ ) {
1773     nmask = 0;
1774     for ( j=0; j<bs; j++ ) {
1775       kmax = rowlengths[rowcount];
1776       for ( k=0; k<kmax; k++ ) {
1777         tmp = jj[nzcount++]/bs;
1778         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1779       }
1780       rowcount++;
1781     }
1782     browlengths[i] += nmask;
1783     /* zero out the mask elements we set */
1784     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1785   }
1786 
1787   /* create our matrix */
1788   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1789   B = *A;
1790   a = (Mat_SeqBAIJ *) B->data;
1791 
1792   /* set matrix "i" values */
1793   a->i[0] = 0;
1794   for ( i=1; i<= mbs; i++ ) {
1795     a->i[i]      = a->i[i-1] + browlengths[i-1];
1796     a->ilen[i-1] = browlengths[i-1];
1797   }
1798   a->nz         = 0;
1799   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1800 
1801   /* read in nonzero values */
1802   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1803   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1804   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1805 
1806   /* set "a" and "j" values into matrix */
1807   nzcount = 0; jcount = 0;
1808   for ( i=0; i<mbs; i++ ) {
1809     nzcountb = nzcount;
1810     nmask    = 0;
1811     for ( j=0; j<bs; j++ ) {
1812       kmax = rowlengths[i*bs+j];
1813       for ( k=0; k<kmax; k++ ) {
1814         tmp = jj[nzcount++]/bs;
1815 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1816       }
1817       rowcount++;
1818     }
1819     /* sort the masked values */
1820     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1821 
1822     /* set "j" values into matrix */
1823     maskcount = 1;
1824     for ( j=0; j<nmask; j++ ) {
1825       a->j[jcount++]  = masked[j];
1826       mask[masked[j]] = maskcount++;
1827     }
1828     /* set "a" values into matrix */
1829     ishift = bs2*a->i[i];
1830     for ( j=0; j<bs; j++ ) {
1831       kmax = rowlengths[i*bs+j];
1832       for ( k=0; k<kmax; k++ ) {
1833         tmp       = jj[nzcountb]/bs ;
1834         block     = mask[tmp] - 1;
1835         point     = jj[nzcountb] - bs*tmp;
1836         idx       = ishift + bs2*block + j + bs*point;
1837         a->a[idx] = aa[nzcountb++];
1838       }
1839     }
1840     /* zero out the mask elements we set */
1841     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1842   }
1843   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1844 
1845   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1846   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1847   ierr = PetscFree(aa);CHKERRQ(ierr);
1848   ierr = PetscFree(jj);CHKERRQ(ierr);
1849   ierr = PetscFree(mask);CHKERRQ(ierr);
1850 
1851   B->assembled = PETSC_TRUE;
1852 
1853   ierr = MatView_Private(B);CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 
1858 
1859