xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c4992f7d82684d6c144ad7e77a9672eae0c62a54)
1 /*$Id: baij.c,v 1.189 1999/11/10 03:19:27 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         *diag = a->diag, *jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   for ( i=0; i<a->mbs; i++ ) {
27     if (jj[diag[i]] != i) {
28       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
29     }
30   }
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNC__
35 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
36 int MatMarkDiagonal_SeqBAIJ(Mat A)
37 {
38   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
39   int         i,j, *diag, m = a->mbs;
40 
41   PetscFunctionBegin;
42   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
43   PLogObjectMemory(A,(m+1)*sizeof(int));
44   for ( i=0; i<m; i++ ) {
45     diag[i] = a->i[i+1];
46     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
47       if (a->j[j] == i) {
48         diag[i] = j;
49         break;
50       }
51     }
52   }
53   a->diag = diag;
54   PetscFunctionReturn(0);
55 }
56 
57 
58 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
59 
60 #undef __FUNC__
61 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
62 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
63                             PetscTruth *done)
64 {
65   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
66   int         ierr, n = a->mbs,i;
67 
68   PetscFunctionBegin;
69   *nn = n;
70   if (!ia) PetscFunctionReturn(0);
71   if (symmetric) {
72     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
73   } else if (oshift == 1) {
74     /* temporarily add 1 to i and j indices */
75     int nz = a->i[n] + 1;
76     for ( i=0; i<nz; i++ ) a->j[i]++;
77     for ( i=0; i<n+1; i++ ) a->i[i]++;
78     *ia = a->i; *ja = a->j;
79   } else {
80     *ia = a->i; *ja = a->j;
81   }
82 
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNC__
87 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
88 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
89                                 PetscTruth *done)
90 {
91   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
92   int         i,n = a->mbs,ierr;
93 
94   PetscFunctionBegin;
95   if (!ia) PetscFunctionReturn(0);
96   if (symmetric) {
97     ierr = PetscFree(*ia);CHKERRQ(ierr);
98     ierr = PetscFree(*ja);CHKERRQ(ierr);
99   } else if (oshift == 1) {
100     int nz = a->i[n];
101     for ( i=0; i<nz; i++ ) a->j[i]--;
102     for ( i=0; i<n+1; i++ ) a->i[i]--;
103   }
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNC__
108 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
109 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
110 {
111   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
112 
113   PetscFunctionBegin;
114   *bs = baij->bs;
115   PetscFunctionReturn(0);
116 }
117 
118 
119 #undef __FUNC__
120 #define __FUNC__ "MatDestroy_SeqBAIJ"
121 int MatDestroy_SeqBAIJ(Mat A)
122 {
123   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
124   int         ierr;
125 
126   PetscFunctionBegin;
127   if (A->mapping) {
128     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
129   }
130   if (A->bmapping) {
131     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
132   }
133   if (A->rmap) {
134     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
135   }
136   if (A->cmap) {
137     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
138   }
139 #if defined(PETSC_USE_LOG)
140   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
141 #endif
142   ierr = PetscFree(a->a);CHKERRQ(ierr);
143   if (!a->singlemalloc) {
144     ierr = PetscFree(a->i);CHKERRQ(ierr);
145     ierr = PetscFree(a->j);CHKERRQ(ierr);
146   }
147   if (a->row) {
148     ierr = ISDestroy(a->row);CHKERRQ(ierr);
149   }
150   if (a->col) {
151     ierr = ISDestroy(a->col);CHKERRQ(ierr);
152   }
153   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
154   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
155   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
156   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
157   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
158   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
159   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
160   ierr = PetscFree(a);CHKERRQ(ierr);
161   PLogObjectDestroy(A);
162   PetscHeaderDestroy(A);
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNC__
167 #define __FUNC__ "MatSetOption_SeqBAIJ"
168 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
169 {
170   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
171 
172   PetscFunctionBegin;
173   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
174   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
175   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
176   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
177   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
178   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
179   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
180   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
181   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
182   else if (op == MAT_ROWS_SORTED ||
183            op == MAT_ROWS_UNSORTED ||
184            op == MAT_SYMMETRIC ||
185            op == MAT_STRUCTURALLY_SYMMETRIC ||
186            op == MAT_YES_NEW_DIAGONALS ||
187            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
188            op == MAT_USE_HASH_TABLE) {
189     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
190   } else if (op == MAT_NO_NEW_DIAGONALS) {
191     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
192   } else {
193     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 
199 #undef __FUNC__
200 #define __FUNC__ "MatGetSize_SeqBAIJ"
201 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
202 {
203   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
204 
205   PetscFunctionBegin;
206   if (m) *m = a->m;
207   if (n) *n = a->n;
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNC__
212 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
213 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
214 {
215   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
216 
217   PetscFunctionBegin;
218   *m = 0; *n = a->m;
219   PetscFunctionReturn(0);
220 }
221 
222 #undef __FUNC__
223 #define __FUNC__ "MatGetRow_SeqBAIJ"
224 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
225 {
226   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
227   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
228   MatScalar    *aa,*aa_i;
229   Scalar       *v_i;
230 
231   PetscFunctionBegin;
232   bs  = a->bs;
233   ai  = a->i;
234   aj  = a->j;
235   aa  = a->a;
236   bs2 = a->bs2;
237 
238   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
239 
240   bn  = row/bs;   /* Block number */
241   bp  = row % bs; /* Block Position */
242   M   = ai[bn+1] - ai[bn];
243   *nz = bs*M;
244 
245   if (v) {
246     *v = 0;
247     if (*nz) {
248       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
249       for ( i=0; i<M; i++ ) { /* for each block in the block row */
250         v_i  = *v + i*bs;
251         aa_i = aa + bs2*(ai[bn] + i);
252         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
253       }
254     }
255   }
256 
257   if (idx) {
258     *idx = 0;
259     if (*nz) {
260       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
261       for ( i=0; i<M; i++ ) { /* for each block in the block row */
262         idx_i = *idx + i*bs;
263         itmp  = bs*aj[ai[bn] + i];
264         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNC__
272 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
273 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
274 {
275   int ierr;
276 
277   PetscFunctionBegin;
278   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
279   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNC__
284 #define __FUNC__ "MatTranspose_SeqBAIJ"
285 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
286 {
287   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
288   Mat         C;
289   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
290   int         *rows,*cols,bs2=a->bs2;
291   MatScalar   *array = a->a;
292 
293   PetscFunctionBegin;
294   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
295   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
296   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
297 
298   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
299   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
300   ierr = PetscFree(col);CHKERRQ(ierr);
301   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
302   cols = rows + bs;
303   for ( i=0; i<mbs; i++ ) {
304     cols[0] = i*bs;
305     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
306     len = ai[i+1] - ai[i];
307     for ( j=0; j<len; j++ ) {
308       rows[0] = (*aj++)*bs;
309       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
310       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
311       array += bs2;
312     }
313   }
314   ierr = PetscFree(rows);CHKERRQ(ierr);
315 
316   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
317   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
318 
319   if (B) {
320     *B = C;
321   } else {
322     PetscOps *Abops;
323     MatOps   Aops;
324 
325     /* This isn't really an in-place transpose */
326     ierr = PetscFree(a->a);CHKERRQ(ierr);
327     if (!a->singlemalloc) {
328       ierr = PetscFree(a->i);CHKERRQ(ierr);
329       ierr = PetscFree(a->j);CHKERRQ(ierr);
330     }
331     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
332     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
333     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
334     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
335     ierr = PetscFree(a);CHKERRQ(ierr);
336 
337 
338     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
339     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
340 
341     /*
342        This is horrible, horrible code. We need to keep the
343       A pointers for the bops and ops but copy everything
344       else from C.
345     */
346     Abops    = A->bops;
347     Aops     = A->ops;
348     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
349     A->bops  = Abops;
350     A->ops   = Aops;
351     A->qlist = 0;
352 
353     PetscHeaderDestroy(C);
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNC__
359 #define __FUNC__ "MatView_SeqBAIJ_Binary"
360 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
361 {
362   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
363   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
364   Scalar      *aa;
365   FILE        *file;
366 
367   PetscFunctionBegin;
368   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
369   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
370   col_lens[0] = MAT_COOKIE;
371 
372   col_lens[1] = a->m;
373   col_lens[2] = a->n;
374   col_lens[3] = a->nz*bs2;
375 
376   /* store lengths of each row and write (including header) to file */
377   count = 0;
378   for ( i=0; i<a->mbs; i++ ) {
379     for ( j=0; j<bs; j++ ) {
380       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
381     }
382   }
383   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
384   ierr = PetscFree(col_lens);CHKERRQ(ierr);
385 
386   /* store column indices (zero start index) */
387   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
388   count = 0;
389   for ( i=0; i<a->mbs; i++ ) {
390     for ( j=0; j<bs; j++ ) {
391       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
392         for ( l=0; l<bs; l++ ) {
393           jj[count++] = bs*a->j[k] + l;
394         }
395       }
396     }
397   }
398   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
399   ierr = PetscFree(jj);CHKERRQ(ierr);
400 
401   /* store nonzero values */
402   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
403   count = 0;
404   for ( i=0; i<a->mbs; i++ ) {
405     for ( j=0; j<bs; j++ ) {
406       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
407         for ( l=0; l<bs; l++ ) {
408           aa[count++] = a->a[bs2*k + l*bs + j];
409         }
410       }
411     }
412   }
413   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
414   ierr = PetscFree(aa);CHKERRQ(ierr);
415 
416   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
417   if (file) {
418     fprintf(file,"-matload_block_size %d\n",a->bs);
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNC__
424 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
425 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
426 {
427   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
428   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
429   char        *outputname;
430 
431   PetscFunctionBegin;
432   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
433   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
434   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
435     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
436   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
437     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
438   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
439     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
440     for ( i=0; i<a->mbs; i++ ) {
441       for ( j=0; j<bs; j++ ) {
442         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
443         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
444           for ( l=0; l<bs; l++ ) {
445 #if defined(PETSC_USE_COMPLEX)
446             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
447               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
448                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
449             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
450               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
451                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
453               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
454             }
455 #else
456             if (a->a[bs2*k + l*bs + j] != 0.0) {
457               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
458             }
459 #endif
460           }
461         }
462         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
463       }
464     }
465     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
466   } else {
467     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
468     for ( i=0; i<a->mbs; i++ ) {
469       for ( j=0; j<bs; j++ ) {
470         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
471         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
472           for ( l=0; l<bs; l++ ) {
473 #if defined(PETSC_USE_COMPLEX)
474             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
475               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
476                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
477             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
478               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
479                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             } else {
481               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
482             }
483 #else
484             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
485 #endif
486           }
487         }
488         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
489       }
490     }
491     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
492   }
493   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNC__
498 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
499 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
500 {
501   Mat          A = (Mat) Aa;
502   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
503   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
504   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
505   MatScalar    *aa;
506   MPI_Comm     comm;
507   Viewer       viewer;
508 
509   PetscFunctionBegin;
510   /*
511       This is nasty. If this is called from an originally parallel matrix
512    then all processes call this, but only the first has the matrix so the
513    rest should return immediately.
514   */
515   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
516   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
517   if (rank) PetscFunctionReturn(0);
518 
519   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
520 
521   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
522 
523   /* loop over matrix elements drawing boxes */
524   color = DRAW_BLUE;
525   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
526     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
527       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
528       x_l = a->j[j]*bs; x_r = x_l + 1.0;
529       aa = a->a + j*bs2;
530       for ( k=0; k<bs; k++ ) {
531         for ( l=0; l<bs; l++ ) {
532           if (PetscReal(*aa++) >=  0.) continue;
533           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
534         }
535       }
536     }
537   }
538   color = DRAW_CYAN;
539   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
540     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
541       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
542       x_l = a->j[j]*bs; x_r = x_l + 1.0;
543       aa = a->a + j*bs2;
544       for ( k=0; k<bs; k++ ) {
545         for ( l=0; l<bs; l++ ) {
546           if (PetscReal(*aa++) != 0.) continue;
547           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552 
553   color = DRAW_RED;
554   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
555     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
556       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
557       x_l = a->j[j]*bs; x_r = x_l + 1.0;
558       aa = a->a + j*bs2;
559       for ( k=0; k<bs; k++ ) {
560         for ( l=0; l<bs; l++ ) {
561           if (PetscReal(*aa++) <= 0.) continue;
562           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNC__
571 #define __FUNC__ "MatView_SeqBAIJ_Draw"
572 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
573 {
574   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
575   int          ierr;
576   double       xl,yl,xr,yr,w,h;
577   Draw         draw;
578   PetscTruth   isnull;
579 
580   PetscFunctionBegin;
581 
582   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
583   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
584 
585   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
586   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
587   xr += w;    yr += h;  xl = -w;     yl = -h;
588   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
589   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
590   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatView_SeqBAIJ"
596 int MatView_SeqBAIJ(Mat A,Viewer viewer)
597 {
598   int        ierr;
599   PetscTruth issocket,isascii,isbinary,isdraw;
600 
601   PetscFunctionBegin;
602   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
603   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
604   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
606   if (issocket) {
607     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
608   } else if (isascii){
609     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
610   } else if (isbinary) {
611     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
612   } else if (isdraw) {
613     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
614   } else {
615     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 
621 #undef __FUNC__
622 #define __FUNC__ "MatGetValues_SeqBAIJ"
623 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
624 {
625   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
626   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
627   int        *ai = a->i, *ailen = a->ilen;
628   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
629   MatScalar  *ap, *aa = a->a, zero = 0.0;
630 
631   PetscFunctionBegin;
632   for ( k=0; k<m; k++ ) { /* loop over rows */
633     row  = im[k]; brow = row/bs;
634     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
635     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
636     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
637     nrow = ailen[brow];
638     for ( l=0; l<n; l++ ) { /* loop over columns */
639       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
640       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
641       col  = in[l] ;
642       bcol = col/bs;
643       cidx = col%bs;
644       ridx = row%bs;
645       high = nrow;
646       low  = 0; /* assume unsorted */
647       while (high-low > 5) {
648         t = (low+high)/2;
649         if (rp[t] > bcol) high = t;
650         else             low  = t;
651       }
652       for ( i=low; i<high; i++ ) {
653         if (rp[i] > bcol) break;
654         if (rp[i] == bcol) {
655           *v++ = ap[bs2*i+bs*cidx+ridx];
656           goto finished;
657         }
658       }
659       *v++ = zero;
660       finished:;
661     }
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 
667 #undef __FUNC__
668 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
669 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
670 {
671   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
672   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
673   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
674   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
675   Scalar      *value = v;
676   MatScalar   *ap,*aa=a->a,*bap;
677 
678   PetscFunctionBegin;
679   if (roworiented) {
680     stepval = (n-1)*bs;
681   } else {
682     stepval = (m-1)*bs;
683   }
684   for ( k=0; k<m; k++ ) { /* loop over added rows */
685     row  = im[k];
686     if (row < 0) continue;
687 #if defined(PETSC_USE_BOPT_g)
688     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
689 #endif
690     rp   = aj + ai[row];
691     ap   = aa + bs2*ai[row];
692     rmax = imax[row];
693     nrow = ailen[row];
694     low  = 0;
695     for ( l=0; l<n; l++ ) { /* loop over added columns */
696       if (in[l] < 0) continue;
697 #if defined(PETSC_USE_BOPT_g)
698       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
699 #endif
700       col = in[l];
701       if (roworiented) {
702         value = v + k*(stepval+bs)*bs + l*bs;
703       } else {
704         value = v + l*(stepval+bs)*bs + k*bs;
705       }
706       if (!sorted) low = 0; high = nrow;
707       while (high-low > 7) {
708         t = (low+high)/2;
709         if (rp[t] > col) high = t;
710         else             low  = t;
711       }
712       for ( i=low; i<high; i++ ) {
713         if (rp[i] > col) break;
714         if (rp[i] == col) {
715           bap  = ap +  bs2*i;
716           if (roworiented) {
717             if (is == ADD_VALUES) {
718               for ( ii=0; ii<bs; ii++,value+=stepval ) {
719                 for (jj=ii; jj<bs2; jj+=bs ) {
720                   bap[jj] += *value++;
721                 }
722               }
723             } else {
724               for ( ii=0; ii<bs; ii++,value+=stepval ) {
725                 for (jj=ii; jj<bs2; jj+=bs ) {
726                   bap[jj] = *value++;
727                 }
728               }
729             }
730           } else {
731             if (is == ADD_VALUES) {
732               for ( ii=0; ii<bs; ii++,value+=stepval ) {
733                 for (jj=0; jj<bs; jj++ ) {
734                   *bap++ += *value++;
735                 }
736               }
737             } else {
738               for ( ii=0; ii<bs; ii++,value+=stepval ) {
739                 for (jj=0; jj<bs; jj++ ) {
740                   *bap++  = *value++;
741                 }
742               }
743             }
744           }
745           goto noinsert2;
746         }
747       }
748       if (nonew == 1) goto noinsert2;
749       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
750       if (nrow >= rmax) {
751         /* there is no extra room in row, therefore enlarge */
752         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
753         MatScalar *new_a;
754 
755         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
756 
757         /* malloc new storage space */
758         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
759         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
760         new_j   = (int *) (new_a + bs2*new_nz);
761         new_i   = new_j + new_nz;
762 
763         /* copy over old data into new slots */
764         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
765         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
766         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
767         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
768         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
769         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
770         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
771         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
772         /* free up old matrix storage */
773         ierr = PetscFree(a->a);CHKERRQ(ierr);
774         if (!a->singlemalloc) {
775           ierr = PetscFree(a->i);CHKERRQ(ierr);
776           ierr = PetscFree(a->j);CHKERRQ(ierr);
777         }
778         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
779         a->singlemalloc = PETSC_TRUE;
780 
781         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
782         rmax = imax[row] = imax[row] + CHUNKSIZE;
783         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
784         a->maxnz += bs2*CHUNKSIZE;
785         a->reallocs++;
786         a->nz++;
787       }
788       N = nrow++ - 1;
789       /* shift up all the later entries in this row */
790       for ( ii=N; ii>=i; ii-- ) {
791         rp[ii+1] = rp[ii];
792         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
793       }
794       if (N >= i) {
795         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
796       }
797       rp[i] = col;
798       bap   = ap +  bs2*i;
799       if (roworiented) {
800         for ( ii=0; ii<bs; ii++,value+=stepval) {
801           for (jj=ii; jj<bs2; jj+=bs ) {
802             bap[jj] = *value++;
803           }
804         }
805       } else {
806         for ( ii=0; ii<bs; ii++,value+=stepval ) {
807           for (jj=0; jj<bs; jj++ ) {
808             *bap++  = *value++;
809           }
810         }
811       }
812       noinsert2:;
813       low = i;
814     }
815     ailen[row] = nrow;
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
823 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
824 {
825   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
826   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
827   int        m = a->m,*ip, N, *ailen = a->ilen;
828   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
829   MatScalar  *aa = a->a, *ap;
830 
831   PetscFunctionBegin;
832   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
833 
834   if (m) rmax = ailen[0];
835   for ( i=1; i<mbs; i++ ) {
836     /* move each row back by the amount of empty slots (fshift) before it*/
837     fshift += imax[i-1] - ailen[i-1];
838     rmax   = PetscMax(rmax,ailen[i]);
839     if (fshift) {
840       ip = aj + ai[i]; ap = aa + bs2*ai[i];
841       N = ailen[i];
842       for ( j=0; j<N; j++ ) {
843         ip[j-fshift] = ip[j];
844         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
845       }
846     }
847     ai[i] = ai[i-1] + ailen[i-1];
848   }
849   if (mbs) {
850     fshift += imax[mbs-1] - ailen[mbs-1];
851     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
852   }
853   /* reset ilen and imax for each row */
854   for ( i=0; i<mbs; i++ ) {
855     ailen[i] = imax[i] = ai[i+1] - ai[i];
856   }
857   a->nz = ai[mbs];
858 
859   /* diagonals may have moved, so kill the diagonal pointers */
860   if (fshift && a->diag) {
861     ierr = PetscFree(a->diag);CHKERRQ(ierr);
862     PLogObjectMemory(A,-(m+1)*sizeof(int));
863     a->diag = 0;
864   }
865   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
866            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
867   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
868            a->reallocs);
869   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
870   a->reallocs          = 0;
871   A->info.nz_unneeded  = (double)fshift*bs2;
872 
873   PetscFunctionReturn(0);
874 }
875 
876 
877 
878 /*
879    This function returns an array of flags which indicate the locations of contiguous
880    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
881    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
882    Assume: sizes should be long enough to hold all the values.
883 */
884 #undef __FUNC__
885 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
886 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
887 {
888   int        i,j,k,row;
889   PetscTruth flg;
890 
891   PetscFunctionBegin;
892   for ( i=0,j=0; i<n; j++ ) {
893     row = idx[i];
894     if (row%bs!=0) { /* Not the begining of a block */
895       sizes[j] = 1;
896       i++;
897     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
898       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
899       i++;
900     } else { /* Begining of the block, so check if the complete block exists */
901       flg = PETSC_TRUE;
902       for ( k=1; k<bs; k++ ) {
903         if (row+k != idx[i+k]) { /* break in the block */
904           flg = PETSC_FALSE;
905           break;
906         }
907       }
908       if (flg == PETSC_TRUE) { /* No break in the bs */
909         sizes[j] = bs;
910         i+= bs;
911       } else {
912         sizes[j] = 1;
913         i++;
914       }
915     }
916   }
917   *bs_max = j;
918   PetscFunctionReturn(0);
919 }
920 
921 #undef __FUNC__
922 #define __FUNC__ "MatZeroRows_SeqBAIJ"
923 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
924 {
925   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
926   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
927   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
928   Scalar      zero = 0.0;
929   MatScalar   *aa;
930 
931   PetscFunctionBegin;
932   /* Make a copy of the IS and  sort it */
933   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
934   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
935 
936   /* allocate memory for rows,sizes */
937   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
938   sizes = rows + is_n;
939 
940   /* initialize copy IS valurs to rows, and sort them */
941   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
942   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
943   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
944   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
945 
946   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
947     row   = rows[j];
948     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
949     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
950     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
951     if (sizes[i] == bs) {
952       if (diag) {
953         if (baij->ilen[row/bs] > 0) {
954           baij->ilen[row/bs] = 1;
955           baij->j[baij->i[row/bs]] = row/bs;
956           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
957         }
958         /* Now insert all the diagoanl values for this bs */
959         for ( k=0; k<bs; k++ ) {
960           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
961         }
962       } else { /* (!diag) */
963         baij->ilen[row/bs] = 0;
964       } /* end (!diag) */
965     } else { /* (sizes[i] != bs) */
966 #if defined (PETSC_USE_DEBUG)
967       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
968 #endif
969       for ( k=0; k<count; k++ ) {
970         aa[0] = zero;
971         aa+=bs;
972       }
973       if (diag) {
974         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
975       }
976     }
977   }
978 
979   ierr = PetscFree(rows);CHKERRQ(ierr);
980   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNC__
985 #define __FUNC__ "MatSetValues_SeqBAIJ"
986 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
987 {
988   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
989   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
990   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
991   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
992   int         ridx,cidx,bs2=a->bs2,ierr;
993   MatScalar   *ap,value,*aa=a->a,*bap;
994 
995   PetscFunctionBegin;
996   for ( k=0; k<m; k++ ) { /* loop over added rows */
997     row  = im[k]; brow = row/bs;
998     if (row < 0) continue;
999 #if defined(PETSC_USE_BOPT_g)
1000     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1001 #endif
1002     rp   = aj + ai[brow];
1003     ap   = aa + bs2*ai[brow];
1004     rmax = imax[brow];
1005     nrow = ailen[brow];
1006     low  = 0;
1007     for ( l=0; l<n; l++ ) { /* loop over added columns */
1008       if (in[l] < 0) continue;
1009 #if defined(PETSC_USE_BOPT_g)
1010       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1011 #endif
1012       col = in[l]; bcol = col/bs;
1013       ridx = row % bs; cidx = col % bs;
1014       if (roworiented) {
1015         value = v[l + k*n];
1016       } else {
1017         value = v[k + l*m];
1018       }
1019       if (!sorted) low = 0; high = nrow;
1020       while (high-low > 7) {
1021         t = (low+high)/2;
1022         if (rp[t] > bcol) high = t;
1023         else              low  = t;
1024       }
1025       for ( i=low; i<high; i++ ) {
1026         if (rp[i] > bcol) break;
1027         if (rp[i] == bcol) {
1028           bap  = ap +  bs2*i + bs*cidx + ridx;
1029           if (is == ADD_VALUES) *bap += value;
1030           else                  *bap  = value;
1031           goto noinsert1;
1032         }
1033       }
1034       if (nonew == 1) goto noinsert1;
1035       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1036       if (nrow >= rmax) {
1037         /* there is no extra room in row, therefore enlarge */
1038         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1039         MatScalar *new_a;
1040 
1041         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1042 
1043         /* Malloc new storage space */
1044         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1045         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1046         new_j   = (int *) (new_a + bs2*new_nz);
1047         new_i   = new_j + new_nz;
1048 
1049         /* copy over old data into new slots */
1050         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1051         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1052         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1053         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1054         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1055         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1056         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1057         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1058         /* free up old matrix storage */
1059         ierr = PetscFree(a->a);CHKERRQ(ierr);
1060         if (!a->singlemalloc) {
1061           ierr = PetscFree(a->i);CHKERRQ(ierr);
1062           ierr = PetscFree(a->j);CHKERRQ(ierr);
1063         }
1064         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1065         a->singlemalloc = PETSC_TRUE;
1066 
1067         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1068         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1069         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1070         a->maxnz += bs2*CHUNKSIZE;
1071         a->reallocs++;
1072         a->nz++;
1073       }
1074       N = nrow++ - 1;
1075       /* shift up all the later entries in this row */
1076       for ( ii=N; ii>=i; ii-- ) {
1077         rp[ii+1] = rp[ii];
1078         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1079       }
1080       if (N>=i) {
1081         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1082       }
1083       rp[i]                      = bcol;
1084       ap[bs2*i + bs*cidx + ridx] = value;
1085       noinsert1:;
1086       low = i;
1087     }
1088     ailen[brow] = nrow;
1089   }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1094 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1095 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1096 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1097 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1098 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1099 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1100 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1101 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1102 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1103 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1104 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1105 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1106 extern int MatZeroEntries_SeqBAIJ(Mat);
1107 
1108 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1109 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1110 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1111 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1112 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1113 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1114 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1115 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1116 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec);
1117 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec);
1118 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec);
1119 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec);
1120 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec);
1121 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec);
1122 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec);
1123 
1124 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1125 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1126 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1127 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1128 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1129 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1130 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1131 
1132 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1133 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1134 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1135 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1136 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1137 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1138 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1139 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1140 
1141 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1142 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1143 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1144 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1145 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1146 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1147 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1148 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1149 
1150 #undef __FUNC__
1151 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1152 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1153 {
1154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1155   Mat         outA;
1156   int         ierr;
1157   PetscTruth  row_identity, col_identity;
1158 
1159   PetscFunctionBegin;
1160   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1161   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1162   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1163   if (!row_identity || !col_identity) {
1164     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1165   }
1166 
1167   outA          = inA;
1168   inA->factor   = FACTOR_LU;
1169 
1170   if (!a->diag) {
1171     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1172   }
1173   /*
1174       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1175       for ILU(0) factorization with natural ordering
1176   */
1177   switch (a->bs) {
1178   case 1:
1179     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1180     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1181   case 2:
1182     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1183     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1184     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2_NaturalOrdering;
1185     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1186     break;
1187   case 3:
1188     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1189     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1190     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3_NaturalOrdering;
1191     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1192     break;
1193   case 4:
1194     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1195     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1196     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4_NaturalOrdering;
1197     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1198     break;
1199   case 5:
1200     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1201     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1202     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5_NaturalOrdering;
1203     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1204     break;
1205   case 6:
1206     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1207     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1208     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6_NaturalOrdering;
1209     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1210     break;
1211   case 7:
1212     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1213     inA->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7_NaturalOrdering;
1214     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1215     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1216     break;
1217   default:
1218     a->row        = row;
1219     a->col        = col;
1220     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1221     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1222 
1223     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1224     ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1225     PLogObjectParent(inA,a->icol);
1226 
1227     if (!a->solve_work) {
1228       a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1229       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1230     }
1231   }
1232 
1233   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1234 
1235   PetscFunctionReturn(0);
1236 }
1237 #undef __FUNC__
1238 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1239 int MatPrintHelp_SeqBAIJ(Mat A)
1240 {
1241   static PetscTruth called = PETSC_FALSE;
1242   MPI_Comm          comm = A->comm;
1243   int               ierr;
1244 
1245   PetscFunctionBegin;
1246   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1247   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1248   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 EXTERN_C_BEGIN
1253 #undef __FUNC__
1254 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1255 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1256 {
1257   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1258   int         i,nz,n;
1259 
1260   PetscFunctionBegin;
1261   nz = baij->maxnz;
1262   n  = baij->n;
1263   for (i=0; i<nz; i++) {
1264     baij->j[i] = indices[i];
1265   }
1266   baij->nz = nz;
1267   for ( i=0; i<n; i++ ) {
1268     baij->ilen[i] = baij->imax[i];
1269   }
1270 
1271   PetscFunctionReturn(0);
1272 }
1273 EXTERN_C_END
1274 
1275 #undef __FUNC__
1276 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1277 /*@
1278     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1279        in the matrix.
1280 
1281   Input Parameters:
1282 +  mat - the SeqBAIJ matrix
1283 -  indices - the column indices
1284 
1285   Level: advanced
1286 
1287   Notes:
1288     This can be called if you have precomputed the nonzero structure of the
1289   matrix and want to provide it to the matrix object to improve the performance
1290   of the MatSetValues() operation.
1291 
1292     You MUST have set the correct numbers of nonzeros per row in the call to
1293   MatCreateSeqBAIJ().
1294 
1295     MUST be called before any calls to MatSetValues();
1296 
1297 @*/
1298 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1299 {
1300   int ierr,(*f)(Mat,int *);
1301 
1302   PetscFunctionBegin;
1303   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1304   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1305   if (f) {
1306     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1307   } else {
1308     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /* -------------------------------------------------------------------*/
1314 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1315        MatGetRow_SeqBAIJ,
1316        MatRestoreRow_SeqBAIJ,
1317        MatMult_SeqBAIJ_N,
1318        MatMultAdd_SeqBAIJ_N,
1319        MatMultTrans_SeqBAIJ,
1320        MatMultTransAdd_SeqBAIJ,
1321        MatSolve_SeqBAIJ_N,
1322        0,
1323        0,
1324        0,
1325        MatLUFactor_SeqBAIJ,
1326        0,
1327        0,
1328        MatTranspose_SeqBAIJ,
1329        MatGetInfo_SeqBAIJ,
1330        MatEqual_SeqBAIJ,
1331        MatGetDiagonal_SeqBAIJ,
1332        MatDiagonalScale_SeqBAIJ,
1333        MatNorm_SeqBAIJ,
1334        0,
1335        MatAssemblyEnd_SeqBAIJ,
1336        0,
1337        MatSetOption_SeqBAIJ,
1338        MatZeroEntries_SeqBAIJ,
1339        MatZeroRows_SeqBAIJ,
1340        MatLUFactorSymbolic_SeqBAIJ,
1341        MatLUFactorNumeric_SeqBAIJ_N,
1342        0,
1343        0,
1344        MatGetSize_SeqBAIJ,
1345        MatGetSize_SeqBAIJ,
1346        MatGetOwnershipRange_SeqBAIJ,
1347        MatILUFactorSymbolic_SeqBAIJ,
1348        0,
1349        0,
1350        0,
1351        MatDuplicate_SeqBAIJ,
1352        0,
1353        0,
1354        MatILUFactor_SeqBAIJ,
1355        0,
1356        0,
1357        MatGetSubMatrices_SeqBAIJ,
1358        MatIncreaseOverlap_SeqBAIJ,
1359        MatGetValues_SeqBAIJ,
1360        0,
1361        MatPrintHelp_SeqBAIJ,
1362        MatScale_SeqBAIJ,
1363        0,
1364        0,
1365        0,
1366        MatGetBlockSize_SeqBAIJ,
1367        MatGetRowIJ_SeqBAIJ,
1368        MatRestoreRowIJ_SeqBAIJ,
1369        0,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        MatSetValuesBlocked_SeqBAIJ,
1376        MatGetSubMatrix_SeqBAIJ,
1377        0,
1378        0,
1379        MatGetMaps_Petsc};
1380 
1381 EXTERN_C_BEGIN
1382 #undef __FUNC__
1383 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1384 int MatStoreValues_SeqBAIJ(Mat mat)
1385 {
1386   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1387   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1388   int         ierr;
1389 
1390   PetscFunctionBegin;
1391   if (aij->nonew != 1) {
1392     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1393   }
1394 
1395   /* allocate space for values if not already there */
1396   if (!aij->saved_values) {
1397     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1398   }
1399 
1400   /* copy values over */
1401   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 EXTERN_C_END
1405 
1406 EXTERN_C_BEGIN
1407 #undef __FUNC__
1408 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1409 int MatRetrieveValues_SeqBAIJ(Mat mat)
1410 {
1411   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1412   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1413 
1414   PetscFunctionBegin;
1415   if (aij->nonew != 1) {
1416     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1417   }
1418   if (!aij->saved_values) {
1419     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1420   }
1421 
1422   /* copy values over */
1423   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 EXTERN_C_END
1427 
1428 #undef __FUNC__
1429 #define __FUNC__ "MatCreateSeqBAIJ"
1430 /*@C
1431    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1432    compressed row) format.  For good matrix assembly performance the
1433    user should preallocate the matrix storage by setting the parameter nz
1434    (or the array nnz).  By setting these parameters accurately, performance
1435    during matrix assembly can be increased by more than a factor of 50.
1436 
1437    Collective on MPI_Comm
1438 
1439    Input Parameters:
1440 +  comm - MPI communicator, set to PETSC_COMM_SELF
1441 .  bs - size of block
1442 .  m - number of rows
1443 .  n - number of columns
1444 .  nz - number of block nonzeros per block row (same for all rows)
1445 -  nnz - array containing the number of block nonzeros in the various block rows
1446          (possibly different for each block row) or PETSC_NULL
1447 
1448    Output Parameter:
1449 .  A - the matrix
1450 
1451    Options Database Keys:
1452 .   -mat_no_unroll - uses code that does not unroll the loops in the
1453                      block calculations (much slower)
1454 .    -mat_block_size - size of the blocks to use
1455 
1456    Level: intermediate
1457 
1458    Notes:
1459    The block AIJ format is fully compatible with standard Fortran 77
1460    storage.  That is, the stored row and column indices can begin at
1461    either one (as in Fortran) or zero.  See the users' manual for details.
1462 
1463    Specify the preallocated storage with either nz or nnz (not both).
1464    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1465    allocation.  For additional details, see the users manual chapter on
1466    matrices.
1467 
1468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1469 @*/
1470 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1471 {
1472   Mat         B;
1473   Mat_SeqBAIJ *b;
1474   int         i,len,ierr,mbs,nbs,bs2,size;
1475   PetscTruth  flg;
1476 
1477   PetscFunctionBegin;
1478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1479   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1480 
1481   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1482   mbs  = m/bs;
1483   nbs  = n/bs;
1484   bs2  = bs*bs;
1485 
1486   if (mbs*bs!=m || nbs*bs!=n) {
1487     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1488   }
1489 
1490   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1491   if (nnz) {
1492     for (i=0; i<mbs; i++) {
1493       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1494     }
1495   }
1496 
1497   *A = 0;
1498   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1499   PLogObjectCreate(B);
1500   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1501   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1502   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1503   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1504   if (!flg) {
1505     switch (bs) {
1506     case 1:
1507       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1508       B->ops->solve           = MatSolve_SeqBAIJ_1;
1509       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_1;
1510       B->ops->mult            = MatMult_SeqBAIJ_1;
1511       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1512       break;
1513     case 2:
1514       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1515       B->ops->solve           = MatSolve_SeqBAIJ_2;
1516       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_2;
1517       B->ops->mult            = MatMult_SeqBAIJ_2;
1518       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1519       break;
1520     case 3:
1521       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1522       B->ops->solve           = MatSolve_SeqBAIJ_3;
1523       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_3;
1524       B->ops->mult            = MatMult_SeqBAIJ_3;
1525       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1526       break;
1527     case 4:
1528       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1529       B->ops->solve           = MatSolve_SeqBAIJ_4;
1530       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_4;
1531       B->ops->mult            = MatMult_SeqBAIJ_4;
1532       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1533       break;
1534     case 5:
1535       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1536       B->ops->solve           = MatSolve_SeqBAIJ_5;
1537       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_5;
1538       B->ops->mult            = MatMult_SeqBAIJ_5;
1539       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1540       break;
1541     case 6:
1542       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1543       B->ops->solve           = MatSolve_SeqBAIJ_6;
1544       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_6;
1545       B->ops->mult            = MatMult_SeqBAIJ_6;
1546       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1547       break;
1548     case 7:
1549       B->ops->mult            = MatMult_SeqBAIJ_7;
1550       B->ops->solve           = MatSolve_SeqBAIJ_7;
1551       B->ops->solvetrans      = MatSolveTrans_SeqBAIJ_7;
1552       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1553       break;
1554     }
1555   }
1556   B->ops->destroy     = MatDestroy_SeqBAIJ;
1557   B->ops->view        = MatView_SeqBAIJ;
1558   B->factor           = 0;
1559   B->lupivotthreshold = 1.0;
1560   B->mapping          = 0;
1561   b->row              = 0;
1562   b->col              = 0;
1563   b->icol             = 0;
1564   b->reallocs         = 0;
1565   b->saved_values     = 0;
1566 
1567   b->m       = m; B->m = m; B->M = m;
1568   b->n       = n; B->n = n; B->N = n;
1569 
1570   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1571   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1572 
1573   b->mbs     = mbs;
1574   b->nbs     = nbs;
1575   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1576   if (!nnz) {
1577     if (nz == PETSC_DEFAULT) nz = 5;
1578     else if (nz <= 0)        nz = 1;
1579     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1580     nz = nz*mbs;
1581   } else {
1582     nz = 0;
1583     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1584   }
1585 
1586   /* allocate the matrix space */
1587   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1588   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1589   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1590   b->j  = (int *) (b->a + nz*bs2);
1591   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1592   b->i  = b->j + nz;
1593   b->singlemalloc = PETSC_TRUE;
1594 
1595   b->i[0] = 0;
1596   for (i=1; i<mbs+1; i++) {
1597     b->i[i] = b->i[i-1] + b->imax[i-1];
1598   }
1599 
1600   /* b->ilen will count nonzeros in each block row so far. */
1601   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1602   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1603   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1604 
1605   b->bs               = bs;
1606   b->bs2              = bs2;
1607   b->mbs              = mbs;
1608   b->nz               = 0;
1609   b->maxnz            = nz*bs2;
1610   b->sorted           = PETSC_FALSE;
1611   b->roworiented      = PETSC_TRUE;
1612   b->nonew            = 0;
1613   b->diag             = 0;
1614   b->solve_work       = 0;
1615   b->mult_work        = 0;
1616   b->spptr            = 0;
1617   B->info.nz_unneeded = (double)b->maxnz;
1618 
1619   *A = B;
1620   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1621   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1622 
1623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1624                                      "MatStoreValues_SeqBAIJ",
1625                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1627                                      "MatRetrieveValues_SeqBAIJ",
1628                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1630                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1631                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNC__
1636 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1637 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1638 {
1639   Mat         C;
1640   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1641   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1642 
1643   PetscFunctionBegin;
1644   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1645 
1646   *B = 0;
1647   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1648   PLogObjectCreate(C);
1649   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1650   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1651   C->ops->destroy = MatDestroy_SeqBAIJ;
1652   C->ops->view    = MatView_SeqBAIJ;
1653   C->factor       = A->factor;
1654   c->row          = 0;
1655   c->col          = 0;
1656   c->icol         = 0;
1657   c->saved_values = 0;
1658   C->assembled    = PETSC_TRUE;
1659 
1660   c->m = C->m   = a->m;
1661   c->n = C->n   = a->n;
1662   C->M          = a->m;
1663   C->N          = a->n;
1664 
1665   c->bs         = a->bs;
1666   c->bs2        = a->bs2;
1667   c->mbs        = a->mbs;
1668   c->nbs        = a->nbs;
1669 
1670   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1671   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1672   for ( i=0; i<mbs; i++ ) {
1673     c->imax[i] = a->imax[i];
1674     c->ilen[i] = a->ilen[i];
1675   }
1676 
1677   /* allocate the matrix space */
1678   c->singlemalloc = PETSC_TRUE;
1679   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1680   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1681   c->j = (int *) (c->a + nz*bs2);
1682   c->i = c->j + nz;
1683   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1684   if (mbs > 0) {
1685     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1686     if (cpvalues == MAT_COPY_VALUES) {
1687       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1688     } else {
1689       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1690     }
1691   }
1692 
1693   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1694   c->sorted      = a->sorted;
1695   c->roworiented = a->roworiented;
1696   c->nonew       = a->nonew;
1697 
1698   if (a->diag) {
1699     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1700     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1701     for ( i=0; i<mbs; i++ ) {
1702       c->diag[i] = a->diag[i];
1703     }
1704   } else c->diag        = 0;
1705   c->nz                 = a->nz;
1706   c->maxnz              = a->maxnz;
1707   c->solve_work         = 0;
1708   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1709   c->mult_work          = 0;
1710   *B = C;
1711   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNC__
1716 #define __FUNC__ "MatLoad_SeqBAIJ"
1717 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1718 {
1719   Mat_SeqBAIJ  *a;
1720   Mat          B;
1721   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1722   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1723   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1724   int          *masked, nmask,tmp,bs2,ishift;
1725   Scalar       *aa;
1726   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1727 
1728   PetscFunctionBegin;
1729   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1730   bs2  = bs*bs;
1731 
1732   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1733   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1734   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1735   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1736   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1737   M = header[1]; N = header[2]; nz = header[3];
1738 
1739   if (header[3] < 0) {
1740     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1741   }
1742 
1743   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1744 
1745   /*
1746      This code adds extra rows to make sure the number of rows is
1747     divisible by the blocksize
1748   */
1749   mbs        = M/bs;
1750   extra_rows = bs - M + bs*(mbs);
1751   if (extra_rows == bs) extra_rows = 0;
1752   else                  mbs++;
1753   if (extra_rows) {
1754     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1755   }
1756 
1757   /* read in row lengths */
1758   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1759   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1760   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1761 
1762   /* read in column indices */
1763   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1764   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1765   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1766 
1767   /* loop over row lengths determining block row lengths */
1768   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1769   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1770   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1771   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1772   masked      = mask + mbs;
1773   rowcount    = 0; nzcount = 0;
1774   for ( i=0; i<mbs; i++ ) {
1775     nmask = 0;
1776     for ( j=0; j<bs; j++ ) {
1777       kmax = rowlengths[rowcount];
1778       for ( k=0; k<kmax; k++ ) {
1779         tmp = jj[nzcount++]/bs;
1780         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1781       }
1782       rowcount++;
1783     }
1784     browlengths[i] += nmask;
1785     /* zero out the mask elements we set */
1786     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1787   }
1788 
1789   /* create our matrix */
1790   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1791   B = *A;
1792   a = (Mat_SeqBAIJ *) B->data;
1793 
1794   /* set matrix "i" values */
1795   a->i[0] = 0;
1796   for ( i=1; i<= mbs; i++ ) {
1797     a->i[i]      = a->i[i-1] + browlengths[i-1];
1798     a->ilen[i-1] = browlengths[i-1];
1799   }
1800   a->nz         = 0;
1801   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1802 
1803   /* read in nonzero values */
1804   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1805   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1806   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1807 
1808   /* set "a" and "j" values into matrix */
1809   nzcount = 0; jcount = 0;
1810   for ( i=0; i<mbs; i++ ) {
1811     nzcountb = nzcount;
1812     nmask    = 0;
1813     for ( j=0; j<bs; j++ ) {
1814       kmax = rowlengths[i*bs+j];
1815       for ( k=0; k<kmax; k++ ) {
1816         tmp = jj[nzcount++]/bs;
1817 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1818       }
1819       rowcount++;
1820     }
1821     /* sort the masked values */
1822     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1823 
1824     /* set "j" values into matrix */
1825     maskcount = 1;
1826     for ( j=0; j<nmask; j++ ) {
1827       a->j[jcount++]  = masked[j];
1828       mask[masked[j]] = maskcount++;
1829     }
1830     /* set "a" values into matrix */
1831     ishift = bs2*a->i[i];
1832     for ( j=0; j<bs; j++ ) {
1833       kmax = rowlengths[i*bs+j];
1834       for ( k=0; k<kmax; k++ ) {
1835         tmp       = jj[nzcountb]/bs ;
1836         block     = mask[tmp] - 1;
1837         point     = jj[nzcountb] - bs*tmp;
1838         idx       = ishift + bs2*block + j + bs*point;
1839         a->a[idx] = aa[nzcountb++];
1840       }
1841     }
1842     /* zero out the mask elements we set */
1843     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1844   }
1845   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1846 
1847   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1848   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1849   ierr = PetscFree(aa);CHKERRQ(ierr);
1850   ierr = PetscFree(jj);CHKERRQ(ierr);
1851   ierr = PetscFree(mask);CHKERRQ(ierr);
1852 
1853   B->assembled = PETSC_TRUE;
1854 
1855   ierr = MatView_Private(B);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 
1860 
1861