xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 36db0b34874d8612cc4739cd173ad5a2eae325a7)
1 /*$Id: baij.c,v 1.194 1999/11/24 21:53:59 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "sys.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNC__
18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ"
19 int MatMissingDiagonal_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ"
37 int MatMarkDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
40   int         i,j,*diag,m = a->mbs;
41 
42   PetscFunctionBegin;
43   if (a->diag) PetscFunctionReturn(0);
44 
45   diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag);
46   PLogObjectMemory(A,(m+1)*sizeof(int));
47   for (i=0; i<m; i++) {
48     diag[i] = a->i[i+1];
49     for (j=a->i[i]; j<a->i[i+1]; j++) {
50       if (a->j[j] == i) {
51         diag[i] = j;
52         break;
53       }
54     }
55   }
56   a->diag = diag;
57   PetscFunctionReturn(0);
58 }
59 
60 
61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
62 
63 #undef __FUNC__
64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
66 {
67   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
68   int         ierr,n = a->mbs,i;
69 
70   PetscFunctionBegin;
71   *nn = n;
72   if (!ia) PetscFunctionReturn(0);
73   if (symmetric) {
74     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
75   } else if (oshift == 1) {
76     /* temporarily add 1 to i and j indices */
77     int nz = a->i[n] + 1;
78     for (i=0; i<nz; i++) a->j[i]++;
79     for (i=0; i<n+1; i++) a->i[i]++;
80     *ia = a->i; *ja = a->j;
81   } else {
82     *ia = a->i; *ja = a->j;
83   }
84 
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
91                                 PetscTruth *done)
92 {
93   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
94   int         i,n = a->mbs,ierr;
95 
96   PetscFunctionBegin;
97   if (!ia) PetscFunctionReturn(0);
98   if (symmetric) {
99     ierr = PetscFree(*ia);CHKERRQ(ierr);
100     ierr = PetscFree(*ja);CHKERRQ(ierr);
101   } else if (oshift == 1) {
102     int nz = a->i[n];
103     for (i=0; i<nz; i++) a->j[i]--;
104     for (i=0; i<n+1; i++) a->i[i]--;
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNC__
110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
112 {
113   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
114 
115   PetscFunctionBegin;
116   *bs = baij->bs;
117   PetscFunctionReturn(0);
118 }
119 
120 
121 #undef __FUNC__
122 #define __FUNC__ "MatDestroy_SeqBAIJ"
123 int MatDestroy_SeqBAIJ(Mat A)
124 {
125   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126   int         ierr;
127 
128   PetscFunctionBegin;
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(PETSC_USE_LOG)
142   PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162   ierr = PetscFree(a);CHKERRQ(ierr);
163   PLogObjectDestroy(A);
164   PetscHeaderDestroy(A);
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNC__
169 #define __FUNC__ "MatSetOption_SeqBAIJ"
170 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
171 {
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173 
174   PetscFunctionBegin;
175   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
176   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
177   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
178   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
179   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
180   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
181   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
182   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
183   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
184   else if (op == MAT_ROWS_SORTED ||
185            op == MAT_ROWS_UNSORTED ||
186            op == MAT_SYMMETRIC ||
187            op == MAT_STRUCTURALLY_SYMMETRIC ||
188            op == MAT_YES_NEW_DIAGONALS ||
189            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
190            op == MAT_USE_HASH_TABLE) {
191     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
192   } else if (op == MAT_NO_NEW_DIAGONALS) {
193     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
194   } else {
195     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 
201 #undef __FUNC__
202 #define __FUNC__ "MatGetSize_SeqBAIJ"
203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
204 {
205   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
206 
207   PetscFunctionBegin;
208   if (m) *m = a->m;
209   if (n) *n = a->n;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218 
219   PetscFunctionBegin;
220   *m = 0; *n = a->m;
221   PetscFunctionReturn(0);
222 }
223 
224 #undef __FUNC__
225 #define __FUNC__ "MatGetRow_SeqBAIJ"
226 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
227 {
228   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
229   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
230   MatScalar    *aa,*aa_i;
231   Scalar       *v_i;
232 
233   PetscFunctionBegin;
234   bs  = a->bs;
235   ai  = a->i;
236   aj  = a->j;
237   aa  = a->a;
238   bs2 = a->bs2;
239 
240   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
241 
242   bn  = row/bs;   /* Block number */
243   bp  = row % bs; /* Block Position */
244   M   = ai[bn+1] - ai[bn];
245   *nz = bs*M;
246 
247   if (v) {
248     *v = 0;
249     if (*nz) {
250       *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v);
251       for (i=0; i<M; i++) { /* for each block in the block row */
252         v_i  = *v + i*bs;
253         aa_i = aa + bs2*(ai[bn] + i);
254         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
255       }
256     }
257   }
258 
259   if (idx) {
260     *idx = 0;
261     if (*nz) {
262       *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx);
263       for (i=0; i<M; i++) { /* for each block in the block row */
264         idx_i = *idx + i*bs;
265         itmp  = bs*aj[ai[bn] + i];
266         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
275 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
276 {
277   int ierr;
278 
279   PetscFunctionBegin;
280   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
281   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNC__
286 #define __FUNC__ "MatTranspose_SeqBAIJ"
287 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
288 {
289   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
290   Mat         C;
291   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
292   int         *rows,*cols,bs2=a->bs2,refct;
293   MatScalar   *array = a->a;
294 
295   PetscFunctionBegin;
296   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
297   col  = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
298   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
299 
300   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
301   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
302   ierr = PetscFree(col);CHKERRQ(ierr);
303   rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
304   cols = rows + bs;
305   for (i=0; i<mbs; i++) {
306     cols[0] = i*bs;
307     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
308     len = ai[i+1] - ai[i];
309     for (j=0; j<len; j++) {
310       rows[0] = (*aj++)*bs;
311       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
312       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
313       array += bs2;
314     }
315   }
316   ierr = PetscFree(rows);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320 
321   if (B) {
322     *B = C;
323   } else {
324     PetscOps *Abops;
325     MatOps   Aops;
326 
327     /* This isn't really an in-place transpose */
328     ierr = PetscFree(a->a);CHKERRQ(ierr);
329     if (!a->singlemalloc) {
330       ierr = PetscFree(a->i);CHKERRQ(ierr);
331       ierr = PetscFree(a->j);CHKERRQ(ierr);
332     }
333     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
334     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
335     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
336     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
337     ierr = PetscFree(a);CHKERRQ(ierr);
338 
339 
340     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
341     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
342 
343     /*
344        This is horrible, horrible code. We need to keep the
345       A pointers for the bops and ops but copy everything
346       else from C.
347     */
348     Abops    = A->bops;
349     Aops     = A->ops;
350     refct    = A->refct;
351     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
352     A->bops  = Abops;
353     A->ops   = Aops;
354     A->qlist = 0;
355     A->refct = refct;
356 
357     PetscHeaderDestroy(C);
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNC__
363 #define __FUNC__ "MatView_SeqBAIJ_Binary"
364 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
365 {
366   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
367   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
368   Scalar      *aa;
369   FILE        *file;
370 
371   PetscFunctionBegin;
372   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
373   col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
374   col_lens[0] = MAT_COOKIE;
375 
376   col_lens[1] = a->m;
377   col_lens[2] = a->n;
378   col_lens[3] = a->nz*bs2;
379 
380   /* store lengths of each row and write (including header) to file */
381   count = 0;
382   for (i=0; i<a->mbs; i++) {
383     for (j=0; j<bs; j++) {
384       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
385     }
386   }
387   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
388   ierr = PetscFree(col_lens);CHKERRQ(ierr);
389 
390   /* store column indices (zero start index) */
391   jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj);
392   count = 0;
393   for (i=0; i<a->mbs; i++) {
394     for (j=0; j<bs; j++) {
395       for (k=a->i[i]; k<a->i[i+1]; k++) {
396         for (l=0; l<bs; l++) {
397           jj[count++] = bs*a->j[k] + l;
398         }
399       }
400     }
401   }
402   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
403   ierr = PetscFree(jj);CHKERRQ(ierr);
404 
405   /* store nonzero values */
406   aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
407   count = 0;
408   for (i=0; i<a->mbs; i++) {
409     for (j=0; j<bs; j++) {
410       for (k=a->i[i]; k<a->i[i+1]; k++) {
411         for (l=0; l<bs; l++) {
412           aa[count++] = a->a[bs2*k + l*bs + j];
413         }
414       }
415     }
416   }
417   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
418   ierr = PetscFree(aa);CHKERRQ(ierr);
419 
420   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
421   if (file) {
422     fprintf(file,"-matload_block_size %d\n",a->bs);
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNC__
428 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
429 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
430 {
431   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
432   int         ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2;
433   char        *outputname;
434 
435   PetscFunctionBegin;
436   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
437   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
439     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
441     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
442   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
443     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
444     for (i=0; i<a->mbs; i++) {
445       for (j=0; j<bs; j++) {
446         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
447         for (k=a->i[i]; k<a->i[i+1]; k++) {
448           for (l=0; l<bs; l++) {
449 #if defined(PETSC_USE_COMPLEX)
450             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
451               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
452                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
453             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
454               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
455                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
456             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
458             }
459 #else
460             if (a->a[bs2*k + l*bs + j] != 0.0) {
461               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
462             }
463 #endif
464           }
465         }
466         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
467       }
468     }
469     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
470   } else {
471     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
472     for (i=0; i<a->mbs; i++) {
473       for (j=0; j<bs; j++) {
474         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
475         for (k=a->i[i]; k<a->i[i+1]; k++) {
476           for (l=0; l<bs; l++) {
477 #if defined(PETSC_USE_COMPLEX)
478             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
479               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
480                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
481             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
482               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
483                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
484             } else {
485               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
486             }
487 #else
488             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
489 #endif
490           }
491         }
492         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
493       }
494     }
495     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
496   }
497   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNC__
502 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
503 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
504 {
505   Mat          A = (Mat) Aa;
506   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
507   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
508   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
509   MatScalar    *aa;
510   MPI_Comm     comm;
511   Viewer       viewer;
512 
513   PetscFunctionBegin;
514   /*
515       This is nasty. If this is called from an originally parallel matrix
516    then all processes call this,but only the first has the matrix so the
517    rest should return immediately.
518   */
519   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
520   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
521   if (rank) PetscFunctionReturn(0);
522 
523   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
524 
525   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
526 
527   /* loop over matrix elements drawing boxes */
528   color = DRAW_BLUE;
529   for (i=0,row=0; i<mbs; i++,row+=bs) {
530     for (j=a->i[i]; j<a->i[i+1]; j++) {
531       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
532       x_l = a->j[j]*bs; x_r = x_l + 1.0;
533       aa = a->a + j*bs2;
534       for (k=0; k<bs; k++) {
535         for (l=0; l<bs; l++) {
536           if (PetscRealPart(*aa++) >=  0.) continue;
537           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
538         }
539       }
540     }
541   }
542   color = DRAW_CYAN;
543   for (i=0,row=0; i<mbs; i++,row+=bs) {
544     for (j=a->i[i]; j<a->i[i+1]; j++) {
545       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
546       x_l = a->j[j]*bs; x_r = x_l + 1.0;
547       aa = a->a + j*bs2;
548       for (k=0; k<bs; k++) {
549         for (l=0; l<bs; l++) {
550           if (PetscRealPart(*aa++) != 0.) continue;
551           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556 
557   color = DRAW_RED;
558   for (i=0,row=0; i<mbs; i++,row+=bs) {
559     for (j=a->i[i]; j<a->i[i+1]; j++) {
560       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
561       x_l = a->j[j]*bs; x_r = x_l + 1.0;
562       aa = a->a + j*bs2;
563       for (k=0; k<bs; k++) {
564         for (l=0; l<bs; l++) {
565           if (PetscRealPart(*aa++) <= 0.) continue;
566           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNC__
575 #define __FUNC__ "MatView_SeqBAIJ_Draw"
576 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
577 {
578   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
579   int          ierr;
580   PetscReal    xl,yl,xr,yr,w,h;
581   Draw         draw;
582   PetscTruth   isnull;
583 
584   PetscFunctionBegin;
585 
586   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
587   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
588 
589   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
590   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
591   xr += w;    yr += h;  xl = -w;     yl = -h;
592   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
593   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
594   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNC__
599 #define __FUNC__ "MatView_SeqBAIJ"
600 int MatView_SeqBAIJ(Mat A,Viewer viewer)
601 {
602   int        ierr;
603   PetscTruth issocket,isascii,isbinary,isdraw;
604 
605   PetscFunctionBegin;
606   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
608   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
609   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
610   if (issocket) {
611     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
612   } else if (isascii){
613     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
614   } else if (isbinary) {
615     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
616   } else if (isdraw) {
617     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
618   } else {
619     SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 
625 #undef __FUNC__
626 #define __FUNC__ "MatGetValues_SeqBAIJ"
627 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
628 {
629   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
630   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
631   int        *ai = a->i,*ailen = a->ilen;
632   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
633   MatScalar  *ap,*aa = a->a,zero = 0.0;
634 
635   PetscFunctionBegin;
636   for (k=0; k<m; k++) { /* loop over rows */
637     row  = im[k]; brow = row/bs;
638     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
639     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
640     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
641     nrow = ailen[brow];
642     for (l=0; l<n; l++) { /* loop over columns */
643       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
644       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
645       col  = in[l] ;
646       bcol = col/bs;
647       cidx = col%bs;
648       ridx = row%bs;
649       high = nrow;
650       low  = 0; /* assume unsorted */
651       while (high-low > 5) {
652         t = (low+high)/2;
653         if (rp[t] > bcol) high = t;
654         else             low  = t;
655       }
656       for (i=low; i<high; i++) {
657         if (rp[i] > bcol) break;
658         if (rp[i] == bcol) {
659           *v++ = ap[bs2*i+bs*cidx+ridx];
660           goto finished;
661         }
662       }
663       *v++ = zero;
664       finished:;
665     }
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
673 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
674 {
675   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
676   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
677   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
678   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
679   Scalar      *value = v;
680   MatScalar   *ap,*aa=a->a,*bap;
681 
682   PetscFunctionBegin;
683   if (roworiented) {
684     stepval = (n-1)*bs;
685   } else {
686     stepval = (m-1)*bs;
687   }
688   for (k=0; k<m; k++) { /* loop over added rows */
689     row  = im[k];
690     if (row < 0) continue;
691 #if defined(PETSC_USE_BOPT_g)
692     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
693 #endif
694     rp   = aj + ai[row];
695     ap   = aa + bs2*ai[row];
696     rmax = imax[row];
697     nrow = ailen[row];
698     low  = 0;
699     for (l=0; l<n; l++) { /* loop over added columns */
700       if (in[l] < 0) continue;
701 #if defined(PETSC_USE_BOPT_g)
702       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
703 #endif
704       col = in[l];
705       if (roworiented) {
706         value = v + k*(stepval+bs)*bs + l*bs;
707       } else {
708         value = v + l*(stepval+bs)*bs + k*bs;
709       }
710       if (!sorted) low = 0; high = nrow;
711       while (high-low > 7) {
712         t = (low+high)/2;
713         if (rp[t] > col) high = t;
714         else             low  = t;
715       }
716       for (i=low; i<high; i++) {
717         if (rp[i] > col) break;
718         if (rp[i] == col) {
719           bap  = ap +  bs2*i;
720           if (roworiented) {
721             if (is == ADD_VALUES) {
722               for (ii=0; ii<bs; ii++,value+=stepval) {
723                 for (jj=ii; jj<bs2; jj+=bs) {
724                   bap[jj] += *value++;
725                 }
726               }
727             } else {
728               for (ii=0; ii<bs; ii++,value+=stepval) {
729                 for (jj=ii; jj<bs2; jj+=bs) {
730                   bap[jj] = *value++;
731                 }
732               }
733             }
734           } else {
735             if (is == ADD_VALUES) {
736               for (ii=0; ii<bs; ii++,value+=stepval) {
737                 for (jj=0; jj<bs; jj++) {
738                   *bap++ += *value++;
739                 }
740               }
741             } else {
742               for (ii=0; ii<bs; ii++,value+=stepval) {
743                 for (jj=0; jj<bs; jj++) {
744                   *bap++  = *value++;
745                 }
746               }
747             }
748           }
749           goto noinsert2;
750         }
751       }
752       if (nonew == 1) goto noinsert2;
753       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
754       if (nrow >= rmax) {
755         /* there is no extra room in row, therefore enlarge */
756         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
757         MatScalar *new_a;
758 
759         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
760 
761         /* malloc new storage space */
762         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
763         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
764         new_j   = (int*)(new_a + bs2*new_nz);
765         new_i   = new_j + new_nz;
766 
767         /* copy over old data into new slots */
768         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
769         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
770         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
771         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
772         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
773         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
774         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
775         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
776         /* free up old matrix storage */
777         ierr = PetscFree(a->a);CHKERRQ(ierr);
778         if (!a->singlemalloc) {
779           ierr = PetscFree(a->i);CHKERRQ(ierr);
780           ierr = PetscFree(a->j);CHKERRQ(ierr);
781         }
782         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
783         a->singlemalloc = PETSC_TRUE;
784 
785         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
786         rmax = imax[row] = imax[row] + CHUNKSIZE;
787         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
788         a->maxnz += bs2*CHUNKSIZE;
789         a->reallocs++;
790         a->nz++;
791       }
792       N = nrow++ - 1;
793       /* shift up all the later entries in this row */
794       for (ii=N; ii>=i; ii--) {
795         rp[ii+1] = rp[ii];
796         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
797       }
798       if (N >= i) {
799         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
800       }
801       rp[i] = col;
802       bap   = ap +  bs2*i;
803       if (roworiented) {
804         for (ii=0; ii<bs; ii++,value+=stepval) {
805           for (jj=ii; jj<bs2; jj+=bs) {
806             bap[jj] = *value++;
807           }
808         }
809       } else {
810         for (ii=0; ii<bs; ii++,value+=stepval) {
811           for (jj=0; jj<bs; jj++) {
812             *bap++  = *value++;
813           }
814         }
815       }
816       noinsert2:;
817       low = i;
818     }
819     ailen[row] = nrow;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
827 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
828 {
829   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
830   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
831   int        m = a->m,*ip,N,*ailen = a->ilen;
832   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
833   MatScalar  *aa = a->a,*ap;
834 
835   PetscFunctionBegin;
836   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
837 
838   if (m) rmax = ailen[0];
839   for (i=1; i<mbs; i++) {
840     /* move each row back by the amount of empty slots (fshift) before it*/
841     fshift += imax[i-1] - ailen[i-1];
842     rmax   = PetscMax(rmax,ailen[i]);
843     if (fshift) {
844       ip = aj + ai[i]; ap = aa + bs2*ai[i];
845       N = ailen[i];
846       for (j=0; j<N; j++) {
847         ip[j-fshift] = ip[j];
848         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
849       }
850     }
851     ai[i] = ai[i-1] + ailen[i-1];
852   }
853   if (mbs) {
854     fshift += imax[mbs-1] - ailen[mbs-1];
855     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
856   }
857   /* reset ilen and imax for each row */
858   for (i=0; i<mbs; i++) {
859     ailen[i] = imax[i] = ai[i+1] - ai[i];
860   }
861   a->nz = ai[mbs];
862 
863   /* diagonals may have moved, so kill the diagonal pointers */
864   if (fshift && a->diag) {
865     ierr = PetscFree(a->diag);CHKERRQ(ierr);
866     PLogObjectMemory(A,-(m+1)*sizeof(int));
867     a->diag = 0;
868   }
869   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
870            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
871   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
872            a->reallocs);
873   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
874   a->reallocs          = 0;
875   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
876 
877   PetscFunctionReturn(0);
878 }
879 
880 
881 
882 /*
883    This function returns an array of flags which indicate the locations of contiguous
884    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
885    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
886    Assume: sizes should be long enough to hold all the values.
887 */
888 #undef __FUNC__
889 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
890 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
891 {
892   int        i,j,k,row;
893   PetscTruth flg;
894 
895   PetscFunctionBegin;
896   for (i=0,j=0; i<n; j++) {
897     row = idx[i];
898     if (row%bs!=0) { /* Not the begining of a block */
899       sizes[j] = 1;
900       i++;
901     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
902       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
903       i++;
904     } else { /* Begining of the block, so check if the complete block exists */
905       flg = PETSC_TRUE;
906       for (k=1; k<bs; k++) {
907         if (row+k != idx[i+k]) { /* break in the block */
908           flg = PETSC_FALSE;
909           break;
910         }
911       }
912       if (flg == PETSC_TRUE) { /* No break in the bs */
913         sizes[j] = bs;
914         i+= bs;
915       } else {
916         sizes[j] = 1;
917         i++;
918       }
919     }
920   }
921   *bs_max = j;
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNC__
926 #define __FUNC__ "MatZeroRows_SeqBAIJ"
927 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
928 {
929   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
930   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
931   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
932   Scalar      zero = 0.0;
933   MatScalar   *aa;
934 
935   PetscFunctionBegin;
936   /* Make a copy of the IS and  sort it */
937   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
938   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
939 
940   /* allocate memory for rows,sizes */
941   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
942   sizes = rows + is_n;
943 
944   /* initialize copy IS valurs to rows, and sort them */
945   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
946   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
947   if (baij->keepzeroedrows) {
948     for (i=0; i<is_n; i++) { sizes[i] = 1; }
949     bs_max = is_n;
950   } else {
951     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
952   }
953   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
954 
955   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
956     row   = rows[j];
957     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
958     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
959     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
960     if (sizes[i] == bs && !baij->keepzeroedrows) {
961       if (diag) {
962         if (baij->ilen[row/bs] > 0) {
963           baij->ilen[row/bs] = 1;
964           baij->j[baij->i[row/bs]] = row/bs;
965           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
966         }
967         /* Now insert all the diagoanl values for this bs */
968         for (k=0; k<bs; k++) {
969           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
970         }
971       } else { /* (!diag) */
972         baij->ilen[row/bs] = 0;
973       } /* end (!diag) */
974     } else { /* (sizes[i] != bs) */
975 #if defined (PETSC_USE_DEBUG)
976       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
977 #endif
978       for (k=0; k<count; k++) {
979         aa[0] = zero;
980         aa+=bs;
981       }
982       if (diag) {
983         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
984       }
985     }
986   }
987 
988   ierr = PetscFree(rows);CHKERRQ(ierr);
989   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNC__
994 #define __FUNC__ "MatSetValues_SeqBAIJ"
995 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
996 {
997   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
998   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
999   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
1000   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1001   int         ridx,cidx,bs2=a->bs2,ierr;
1002   MatScalar   *ap,value,*aa=a->a,*bap;
1003 
1004   PetscFunctionBegin;
1005   for (k=0; k<m; k++) { /* loop over added rows */
1006     row  = im[k]; brow = row/bs;
1007     if (row < 0) continue;
1008 #if defined(PETSC_USE_BOPT_g)
1009     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
1010 #endif
1011     rp   = aj + ai[brow];
1012     ap   = aa + bs2*ai[brow];
1013     rmax = imax[brow];
1014     nrow = ailen[brow];
1015     low  = 0;
1016     for (l=0; l<n; l++) { /* loop over added columns */
1017       if (in[l] < 0) continue;
1018 #if defined(PETSC_USE_BOPT_g)
1019       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1020 #endif
1021       col = in[l]; bcol = col/bs;
1022       ridx = row % bs; cidx = col % bs;
1023       if (roworiented) {
1024         value = v[l + k*n];
1025       } else {
1026         value = v[k + l*m];
1027       }
1028       if (!sorted) low = 0; high = nrow;
1029       while (high-low > 7) {
1030         t = (low+high)/2;
1031         if (rp[t] > bcol) high = t;
1032         else              low  = t;
1033       }
1034       for (i=low; i<high; i++) {
1035         if (rp[i] > bcol) break;
1036         if (rp[i] == bcol) {
1037           bap  = ap +  bs2*i + bs*cidx + ridx;
1038           if (is == ADD_VALUES) *bap += value;
1039           else                  *bap  = value;
1040           goto noinsert1;
1041         }
1042       }
1043       if (nonew == 1) goto noinsert1;
1044       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1045       if (nrow >= rmax) {
1046         /* there is no extra room in row, therefore enlarge */
1047         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1048         MatScalar *new_a;
1049 
1050         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1051 
1052         /* Malloc new storage space */
1053         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1054         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
1055         new_j   = (int*)(new_a + bs2*new_nz);
1056         new_i   = new_j + new_nz;
1057 
1058         /* copy over old data into new slots */
1059         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1060         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1061         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1062         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1063         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1064         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1065         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1066         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1067         /* free up old matrix storage */
1068         ierr = PetscFree(a->a);CHKERRQ(ierr);
1069         if (!a->singlemalloc) {
1070           ierr = PetscFree(a->i);CHKERRQ(ierr);
1071           ierr = PetscFree(a->j);CHKERRQ(ierr);
1072         }
1073         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1074         a->singlemalloc = PETSC_TRUE;
1075 
1076         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1077         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1078         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1079         a->maxnz += bs2*CHUNKSIZE;
1080         a->reallocs++;
1081         a->nz++;
1082       }
1083       N = nrow++ - 1;
1084       /* shift up all the later entries in this row */
1085       for (ii=N; ii>=i; ii--) {
1086         rp[ii+1] = rp[ii];
1087         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1088       }
1089       if (N>=i) {
1090         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1091       }
1092       rp[i]                      = bcol;
1093       ap[bs2*i + bs*cidx + ridx] = value;
1094       noinsert1:;
1095       low = i;
1096     }
1097     ailen[brow] = nrow;
1098   }
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*);
1103 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal);
1104 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1105 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1106 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1107 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1108 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1109 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1110 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1111 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1112 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1113 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1114 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1115 extern int MatZeroEntries_SeqBAIJ(Mat);
1116 
1117 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1118 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1119 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1120 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1121 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1122 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1123 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1124 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1125 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1126 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1127 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1128 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1129 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1130 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1131 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1132 
1133 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1134 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1135 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1136 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1137 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1138 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1139 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1140 
1141 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1142 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1143 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1144 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1145 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1146 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1147 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1148 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1149 
1150 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1151 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1152 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1153 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1154 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1155 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1156 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1157 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1161 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1162 {
1163   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1164   Mat         outA;
1165   int         ierr;
1166   PetscTruth  row_identity,col_identity;
1167 
1168   PetscFunctionBegin;
1169   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1170   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1171   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1172   if (!row_identity || !col_identity) {
1173     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1174   }
1175 
1176   outA          = inA;
1177   inA->factor   = FACTOR_LU;
1178 
1179   if (!a->diag) {
1180     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1181   }
1182   /*
1183       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1184       for ILU(0) factorization with natural ordering
1185   */
1186   switch (a->bs) {
1187   case 1:
1188     inA->ops->solvetranspose   = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1189     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1190   case 2:
1191     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1192     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1193     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1194     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1195     break;
1196   case 3:
1197     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1198     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1199     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1200     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1201     break;
1202   case 4:
1203     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1204     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1205     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1206     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1207     break;
1208   case 5:
1209     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1210     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1211     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1212     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1213     break;
1214   case 6:
1215     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1216     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1217     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1218     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1219     break;
1220   case 7:
1221     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1222     inA->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1223     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1224     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1225     break;
1226   default:
1227     a->row        = row;
1228     a->col        = col;
1229     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1230     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1231 
1232     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1233     ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1234     PLogObjectParent(inA,a->icol);
1235 
1236     if (!a->solve_work) {
1237       a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1238       PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1239     }
1240   }
1241 
1242   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1243 
1244   PetscFunctionReturn(0);
1245 }
1246 #undef __FUNC__
1247 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1248 int MatPrintHelp_SeqBAIJ(Mat A)
1249 {
1250   static PetscTruth called = PETSC_FALSE;
1251   MPI_Comm          comm = A->comm;
1252   int               ierr;
1253 
1254   PetscFunctionBegin;
1255   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1256   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1257   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 EXTERN_C_BEGIN
1262 #undef __FUNC__
1263 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1264 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1265 {
1266   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1267   int         i,nz,n;
1268 
1269   PetscFunctionBegin;
1270   nz = baij->maxnz;
1271   n  = baij->n;
1272   for (i=0; i<nz; i++) {
1273     baij->j[i] = indices[i];
1274   }
1275   baij->nz = nz;
1276   for (i=0; i<n; i++) {
1277     baij->ilen[i] = baij->imax[i];
1278   }
1279 
1280   PetscFunctionReturn(0);
1281 }
1282 EXTERN_C_END
1283 
1284 #undef __FUNC__
1285 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1286 /*@
1287     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1288        in the matrix.
1289 
1290   Input Parameters:
1291 +  mat - the SeqBAIJ matrix
1292 -  indices - the column indices
1293 
1294   Level: advanced
1295 
1296   Notes:
1297     This can be called if you have precomputed the nonzero structure of the
1298   matrix and want to provide it to the matrix object to improve the performance
1299   of the MatSetValues() operation.
1300 
1301     You MUST have set the correct numbers of nonzeros per row in the call to
1302   MatCreateSeqBAIJ().
1303 
1304     MUST be called before any calls to MatSetValues();
1305 
1306 @*/
1307 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1308 {
1309   int ierr,(*f)(Mat,int *);
1310 
1311   PetscFunctionBegin;
1312   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1313   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1314   if (f) {
1315     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1316   } else {
1317     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /* -------------------------------------------------------------------*/
1323 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1324        MatGetRow_SeqBAIJ,
1325        MatRestoreRow_SeqBAIJ,
1326        MatMult_SeqBAIJ_N,
1327        MatMultAdd_SeqBAIJ_N,
1328        MatMultTranspose_SeqBAIJ,
1329        MatMultTransposeAdd_SeqBAIJ,
1330        MatSolve_SeqBAIJ_N,
1331        0,
1332        0,
1333        0,
1334        MatLUFactor_SeqBAIJ,
1335        0,
1336        0,
1337        MatTranspose_SeqBAIJ,
1338        MatGetInfo_SeqBAIJ,
1339        MatEqual_SeqBAIJ,
1340        MatGetDiagonal_SeqBAIJ,
1341        MatDiagonalScale_SeqBAIJ,
1342        MatNorm_SeqBAIJ,
1343        0,
1344        MatAssemblyEnd_SeqBAIJ,
1345        0,
1346        MatSetOption_SeqBAIJ,
1347        MatZeroEntries_SeqBAIJ,
1348        MatZeroRows_SeqBAIJ,
1349        MatLUFactorSymbolic_SeqBAIJ,
1350        MatLUFactorNumeric_SeqBAIJ_N,
1351        0,
1352        0,
1353        MatGetSize_SeqBAIJ,
1354        MatGetSize_SeqBAIJ,
1355        MatGetOwnershipRange_SeqBAIJ,
1356        MatILUFactorSymbolic_SeqBAIJ,
1357        0,
1358        0,
1359        0,
1360        MatDuplicate_SeqBAIJ,
1361        0,
1362        0,
1363        MatILUFactor_SeqBAIJ,
1364        0,
1365        0,
1366        MatGetSubMatrices_SeqBAIJ,
1367        MatIncreaseOverlap_SeqBAIJ,
1368        MatGetValues_SeqBAIJ,
1369        0,
1370        MatPrintHelp_SeqBAIJ,
1371        MatScale_SeqBAIJ,
1372        0,
1373        0,
1374        0,
1375        MatGetBlockSize_SeqBAIJ,
1376        MatGetRowIJ_SeqBAIJ,
1377        MatRestoreRowIJ_SeqBAIJ,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        MatSetValuesBlocked_SeqBAIJ,
1385        MatGetSubMatrix_SeqBAIJ,
1386        0,
1387        0,
1388        MatGetMaps_Petsc};
1389 
1390 EXTERN_C_BEGIN
1391 #undef __FUNC__
1392 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1393 int MatStoreValues_SeqBAIJ(Mat mat)
1394 {
1395   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1396   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1397   int         ierr;
1398 
1399   PetscFunctionBegin;
1400   if (aij->nonew != 1) {
1401     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1402   }
1403 
1404   /* allocate space for values if not already there */
1405   if (!aij->saved_values) {
1406     aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1407   }
1408 
1409   /* copy values over */
1410   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 EXTERN_C_END
1414 
1415 EXTERN_C_BEGIN
1416 #undef __FUNC__
1417 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1418 int MatRetrieveValues_SeqBAIJ(Mat mat)
1419 {
1420   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1421   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1422 
1423   PetscFunctionBegin;
1424   if (aij->nonew != 1) {
1425     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1426   }
1427   if (!aij->saved_values) {
1428     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1429   }
1430 
1431   /* copy values over */
1432   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 EXTERN_C_END
1436 
1437 #undef __FUNC__
1438 #define __FUNC__ "MatCreateSeqBAIJ"
1439 /*@C
1440    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1441    compressed row) format.  For good matrix assembly performance the
1442    user should preallocate the matrix storage by setting the parameter nz
1443    (or the array nnz).  By setting these parameters accurately, performance
1444    during matrix assembly can be increased by more than a factor of 50.
1445 
1446    Collective on MPI_Comm
1447 
1448    Input Parameters:
1449 +  comm - MPI communicator, set to PETSC_COMM_SELF
1450 .  bs - size of block
1451 .  m - number of rows
1452 .  n - number of columns
1453 .  nz - number of block nonzeros per block row (same for all rows)
1454 -  nnz - array containing the number of block nonzeros in the various block rows
1455          (possibly different for each block row) or PETSC_NULL
1456 
1457    Output Parameter:
1458 .  A - the matrix
1459 
1460    Options Database Keys:
1461 .   -mat_no_unroll - uses code that does not unroll the loops in the
1462                      block calculations (much slower)
1463 .    -mat_block_size - size of the blocks to use
1464 
1465    Level: intermediate
1466 
1467    Notes:
1468    The block AIJ format is fully compatible with standard Fortran 77
1469    storage.  That is, the stored row and column indices can begin at
1470    either one (as in Fortran) or zero.  See the users' manual for details.
1471 
1472    Specify the preallocated storage with either nz or nnz (not both).
1473    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1474    allocation.  For additional details, see the users manual chapter on
1475    matrices.
1476 
1477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1478 @*/
1479 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1480 {
1481   Mat         B;
1482   Mat_SeqBAIJ *b;
1483   int         i,len,ierr,mbs,nbs,bs2,size;
1484   PetscTruth  flg;
1485 
1486   PetscFunctionBegin;
1487   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1488   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1489 
1490   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1491   mbs  = m/bs;
1492   nbs  = n/bs;
1493   bs2  = bs*bs;
1494 
1495   if (mbs*bs!=m || nbs*bs!=n) {
1496     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1497   }
1498 
1499   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1500   if (nnz) {
1501     for (i=0; i<mbs; i++) {
1502       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1503     }
1504   }
1505 
1506   *A = 0;
1507   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1508   PLogObjectCreate(B);
1509   B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1510   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1511   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1512   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1513   if (!flg) {
1514     switch (bs) {
1515     case 1:
1516       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1517       B->ops->solve           = MatSolve_SeqBAIJ_1;
1518       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_1;
1519       B->ops->mult            = MatMult_SeqBAIJ_1;
1520       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1521       break;
1522     case 2:
1523       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1524       B->ops->solve           = MatSolve_SeqBAIJ_2;
1525       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_2;
1526       B->ops->mult            = MatMult_SeqBAIJ_2;
1527       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1528       break;
1529     case 3:
1530       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1531       B->ops->solve           = MatSolve_SeqBAIJ_3;
1532       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_3;
1533       B->ops->mult            = MatMult_SeqBAIJ_3;
1534       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1535       break;
1536     case 4:
1537       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1538       B->ops->solve           = MatSolve_SeqBAIJ_4;
1539       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_4;
1540       B->ops->mult            = MatMult_SeqBAIJ_4;
1541       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1542       break;
1543     case 5:
1544       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1545       B->ops->solve           = MatSolve_SeqBAIJ_5;
1546       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_5;
1547       B->ops->mult            = MatMult_SeqBAIJ_5;
1548       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1549       break;
1550     case 6:
1551       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1552       B->ops->solve           = MatSolve_SeqBAIJ_6;
1553       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_6;
1554       B->ops->mult            = MatMult_SeqBAIJ_6;
1555       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1556       break;
1557     case 7:
1558       B->ops->mult            = MatMult_SeqBAIJ_7;
1559       B->ops->solve           = MatSolve_SeqBAIJ_7;
1560       B->ops->solvetranspose  = MatSolveTranspose_SeqBAIJ_7;
1561       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1562       break;
1563     }
1564   }
1565   B->ops->destroy     = MatDestroy_SeqBAIJ;
1566   B->ops->view        = MatView_SeqBAIJ;
1567   B->factor           = 0;
1568   B->lupivotthreshold = 1.0;
1569   B->mapping          = 0;
1570   b->row              = 0;
1571   b->col              = 0;
1572   b->icol             = 0;
1573   b->reallocs         = 0;
1574   b->saved_values     = 0;
1575 
1576   b->m       = m; B->m = m; B->M = m;
1577   b->n       = n; B->n = n; B->N = n;
1578 
1579   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1580   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1581 
1582   b->mbs     = mbs;
1583   b->nbs     = nbs;
1584   b->imax    = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax);
1585   if (!nnz) {
1586     if (nz == PETSC_DEFAULT) nz = 5;
1587     else if (nz <= 0)        nz = 1;
1588     for (i=0; i<mbs; i++) b->imax[i] = nz;
1589     nz = nz*mbs;
1590   } else {
1591     nz = 0;
1592     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1593   }
1594 
1595   /* allocate the matrix space */
1596   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1597   b->a  = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a);
1598   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1599   b->j  = (int*)(b->a + nz*bs2);
1600   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1601   b->i  = b->j + nz;
1602   b->singlemalloc = PETSC_TRUE;
1603 
1604   b->i[0] = 0;
1605   for (i=1; i<mbs+1; i++) {
1606     b->i[i] = b->i[i-1] + b->imax[i-1];
1607   }
1608 
1609   /* b->ilen will count nonzeros in each block row so far. */
1610   b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));
1611   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1612   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1613 
1614   b->bs               = bs;
1615   b->bs2              = bs2;
1616   b->mbs              = mbs;
1617   b->nz               = 0;
1618   b->maxnz            = nz*bs2;
1619   b->sorted           = PETSC_FALSE;
1620   b->roworiented      = PETSC_TRUE;
1621   b->nonew            = 0;
1622   b->diag             = 0;
1623   b->solve_work       = 0;
1624   b->mult_work        = 0;
1625   b->spptr            = 0;
1626   B->info.nz_unneeded = (PetscReal)b->maxnz;
1627   b->keepzeroedrows   = PETSC_FALSE;
1628 
1629   *A = B;
1630   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1631   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1632 
1633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1634                                      "MatStoreValues_SeqBAIJ",
1635                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1637                                      "MatRetrieveValues_SeqBAIJ",
1638                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1640                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1641                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNC__
1646 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1647 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1648 {
1649   Mat         C;
1650   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1651   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1652 
1653   PetscFunctionBegin;
1654   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1655 
1656   *B = 0;
1657   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1658   PLogObjectCreate(C);
1659   C->data           = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1660   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1661   C->ops->destroy   = MatDestroy_SeqBAIJ;
1662   C->ops->view      = MatView_SeqBAIJ;
1663   C->factor         = A->factor;
1664   c->row            = 0;
1665   c->col            = 0;
1666   c->icol           = 0;
1667   c->saved_values   = 0;
1668   c->keepzeroedrows = a->keepzeroedrows;
1669   C->assembled      = PETSC_TRUE;
1670 
1671   c->m = C->m   = a->m;
1672   c->n = C->n   = a->n;
1673   C->M          = a->m;
1674   C->N          = a->n;
1675 
1676   c->bs         = a->bs;
1677   c->bs2        = a->bs2;
1678   c->mbs        = a->mbs;
1679   c->nbs        = a->nbs;
1680 
1681   c->imax       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1682   c->ilen       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1683   for (i=0; i<mbs; i++) {
1684     c->imax[i] = a->imax[i];
1685     c->ilen[i] = a->ilen[i];
1686   }
1687 
1688   /* allocate the matrix space */
1689   c->singlemalloc = PETSC_TRUE;
1690   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1691   c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a);
1692   c->j = (int*)(c->a + nz*bs2);
1693   c->i = c->j + nz;
1694   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1695   if (mbs > 0) {
1696     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1697     if (cpvalues == MAT_COPY_VALUES) {
1698       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1699     } else {
1700       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1701     }
1702   }
1703 
1704   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1705   c->sorted      = a->sorted;
1706   c->roworiented = a->roworiented;
1707   c->nonew       = a->nonew;
1708 
1709   if (a->diag) {
1710     c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag);
1711     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1712     for (i=0; i<mbs; i++) {
1713       c->diag[i] = a->diag[i];
1714     }
1715   } else c->diag        = 0;
1716   c->nz                 = a->nz;
1717   c->maxnz              = a->maxnz;
1718   c->solve_work         = 0;
1719   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1720   c->mult_work          = 0;
1721   *B = C;
1722   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNC__
1727 #define __FUNC__ "MatLoad_SeqBAIJ"
1728 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1729 {
1730   Mat_SeqBAIJ  *a;
1731   Mat          B;
1732   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1733   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1734   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1735   int          *masked,nmask,tmp,bs2,ishift;
1736   Scalar       *aa;
1737   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1738 
1739   PetscFunctionBegin;
1740   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1741   bs2  = bs*bs;
1742 
1743   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1744   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1745   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1746   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1747   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1748   M = header[1]; N = header[2]; nz = header[3];
1749 
1750   if (header[3] < 0) {
1751     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1752   }
1753 
1754   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1755 
1756   /*
1757      This code adds extra rows to make sure the number of rows is
1758     divisible by the blocksize
1759   */
1760   mbs        = M/bs;
1761   extra_rows = bs - M + bs*(mbs);
1762   if (extra_rows == bs) extra_rows = 0;
1763   else                  mbs++;
1764   if (extra_rows) {
1765     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1766   }
1767 
1768   /* read in row lengths */
1769   rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1770   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1771   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1772 
1773   /* read in column indices */
1774   jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj);
1775   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1776   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1777 
1778   /* loop over row lengths determining block row lengths */
1779   browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1780   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1781   mask        = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask);
1782   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1783   masked      = mask + mbs;
1784   rowcount    = 0; nzcount = 0;
1785   for (i=0; i<mbs; i++) {
1786     nmask = 0;
1787     for (j=0; j<bs; j++) {
1788       kmax = rowlengths[rowcount];
1789       for (k=0; k<kmax; k++) {
1790         tmp = jj[nzcount++]/bs;
1791         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1792       }
1793       rowcount++;
1794     }
1795     browlengths[i] += nmask;
1796     /* zero out the mask elements we set */
1797     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1798   }
1799 
1800   /* create our matrix */
1801   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1802   B = *A;
1803   a = (Mat_SeqBAIJ*)B->data;
1804 
1805   /* set matrix "i" values */
1806   a->i[0] = 0;
1807   for (i=1; i<= mbs; i++) {
1808     a->i[i]      = a->i[i-1] + browlengths[i-1];
1809     a->ilen[i-1] = browlengths[i-1];
1810   }
1811   a->nz         = 0;
1812   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1813 
1814   /* read in nonzero values */
1815   aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1816   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1817   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1818 
1819   /* set "a" and "j" values into matrix */
1820   nzcount = 0; jcount = 0;
1821   for (i=0; i<mbs; i++) {
1822     nzcountb = nzcount;
1823     nmask    = 0;
1824     for (j=0; j<bs; j++) {
1825       kmax = rowlengths[i*bs+j];
1826       for (k=0; k<kmax; k++) {
1827         tmp = jj[nzcount++]/bs;
1828 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1829       }
1830       rowcount++;
1831     }
1832     /* sort the masked values */
1833     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1834 
1835     /* set "j" values into matrix */
1836     maskcount = 1;
1837     for (j=0; j<nmask; j++) {
1838       a->j[jcount++]  = masked[j];
1839       mask[masked[j]] = maskcount++;
1840     }
1841     /* set "a" values into matrix */
1842     ishift = bs2*a->i[i];
1843     for (j=0; j<bs; j++) {
1844       kmax = rowlengths[i*bs+j];
1845       for (k=0; k<kmax; k++) {
1846         tmp       = jj[nzcountb]/bs ;
1847         block     = mask[tmp] - 1;
1848         point     = jj[nzcountb] - bs*tmp;
1849         idx       = ishift + bs2*block + j + bs*point;
1850         a->a[idx] = aa[nzcountb++];
1851       }
1852     }
1853     /* zero out the mask elements we set */
1854     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1855   }
1856   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1857 
1858   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1859   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1860   ierr = PetscFree(aa);CHKERRQ(ierr);
1861   ierr = PetscFree(jj);CHKERRQ(ierr);
1862   ierr = PetscFree(mask);CHKERRQ(ierr);
1863 
1864   B->assembled = PETSC_TRUE;
1865 
1866   ierr = MatView_Private(B);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 
1871 
1872