xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 273d9f13de75c4ed17021f7f2c11eebb99d26f0d)
1 /*$Id: sbaij.c,v 1.37 2000/10/19 18:54:23 hzhang Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                            /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 #include "src/mat/impls/sbaij/seq/sbaij.h"
12 
13 #define CHUNKSIZE  10
14 
15 /*
16      Checks for missing diagonals
17 */
18 #undef __FUNC__
19 #define __FUNC__ "MatMissingDiagonal_SeqSBAIJ"
20 int MatMissingDiagonal_SeqSBAIJ(Mat A)
21 {
22   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23   int         *diag,*jj = a->j,i,ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
27   diag = a->diag;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
31     }
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNC__
37 #define __FUNC__ "MatMarkDiagonal_SeqSBAIJ"
38 int MatMarkDiagonal_SeqSBAIJ(Mat A)
39 {
40   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
41   int          i,j,*diag,m = a->mbs;
42 
43   PetscFunctionBegin;
44   if (a->diag) PetscFunctionReturn(0);
45 
46   diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag);
47   PLogObjectMemory(A,(m+1)*sizeof(int));
48   for (i=0; i<m; i++) {
49     diag[i] = a->i[i+1];
50     for (j=a->i[i]; j<a->i[i+1]; j++) {
51       if (a->j[j] == i) {
52         diag[i] = j;
53         break;
54       }
55     }
56   }
57   a->diag = diag;
58   PetscFunctionReturn(0);
59 }
60 
61 
62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
63 
64 #undef __FUNC__
65 #define __FUNC__ "MatGetRowIJ_SeqSBAIJ"
66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
67 {
68   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
69 
70   PetscFunctionBegin;
71   if (ia) {
72     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
73   }
74   *nn = a->mbs;
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNC__
79 #define __FUNC__ "MatRestoreRowIJ_SeqSBAIJ"
80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
81 {
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatGetBlockSize_SeqSBAIJ"
90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
91 {
92   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
93 
94   PetscFunctionBegin;
95   *bs = sbaij->bs;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNC__
100 #define __FUNC__ "MatDestroy_SeqSBAIJ"
101 int MatDestroy_SeqSBAIJ(Mat A)
102 {
103   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
104   int         ierr;
105 
106   PetscFunctionBegin;
107 #if defined(PETSC_USE_LOG)
108   PLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
109 #endif
110   ierr = PetscFree(a->a);CHKERRQ(ierr);
111   if (!a->singlemalloc) {
112     ierr = PetscFree(a->i);CHKERRQ(ierr);
113     ierr = PetscFree(a->j);CHKERRQ(ierr);
114   }
115   if (a->row) {
116     ierr = ISDestroy(a->row);CHKERRQ(ierr);
117   }
118   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
120   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
121   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
122   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
125 
126   if (a->inew){
127     ierr = PetscFree(a->inew);CHKERRQ(ierr);
128     a->inew = 0;
129   }
130   ierr = PetscFree(a);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNC__
135 #define __FUNC__ "MatSetOption_SeqSBAIJ"
136 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
137 {
138   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
139 
140   PetscFunctionBegin;
141   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
142   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
143   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
144   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
145   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
146   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
147   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
148   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
149   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
150   else if (op == MAT_ROWS_SORTED ||
151            op == MAT_ROWS_UNSORTED ||
152            op == MAT_SYMMETRIC ||
153            op == MAT_STRUCTURALLY_SYMMETRIC ||
154            op == MAT_YES_NEW_DIAGONALS ||
155            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
156            op == MAT_USE_HASH_TABLE) {
157     PLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
158   } else if (op == MAT_NO_NEW_DIAGONALS) {
159     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
160   } else {
161     SETERRQ(PETSC_ERR_SUP,"unknown option");
162   }
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNC__
167 #define __FUNC__ "MatGetOwnershipRange_SeqSBAIJ"
168 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n)
169 {
170   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
171 
172   PetscFunctionBegin;
173   *m = 0;
174   *n = A->m;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNC__
179 #define __FUNC__ "MatGetRow_SeqSBAIJ"
180 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v)
181 {
182   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
183   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
184   MatScalar    *aa,*aa_i;
185   Scalar       *v_i;
186 
187   PetscFunctionBegin;
188   bs  = a->bs;
189   ai  = a->i;
190   aj  = a->j;
191   aa  = a->a;
192   bs2 = a->bs2;
193 
194   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
195 
196   bn  = row/bs;   /* Block number */
197   bp  = row % bs; /* Block position */
198   M   = ai[bn+1] - ai[bn];
199   *ncols = bs*M;
200 
201   if (v) {
202     *v = 0;
203     if (*ncols) {
204       *v = (Scalar*)PetscMalloc((*ncols+row)*sizeof(Scalar));CHKPTRQ(*v);
205       for (i=0; i<M; i++) { /* for each block in the block row */
206         v_i  = *v + i*bs;
207         aa_i = aa + bs2*(ai[bn] + i);
208         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
209       }
210     }
211   }
212 
213   if (cols) {
214     *cols = 0;
215     if (*ncols) {
216       *cols = (int*)PetscMalloc((*ncols+row)*sizeof(int));CHKPTRQ(*cols);
217       for (i=0; i<M; i++) { /* for each block in the block row */
218         cols_i = *cols + i*bs;
219         itmp  = bs*aj[ai[bn] + i];
220         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
221       }
222     }
223   }
224 
225   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
226   /* this segment is currently removed, so only entries in the upper triangle are obtained */
227 #ifdef column_search
228   v_i    = *v    + M*bs;
229   cols_i = *cols + M*bs;
230   for (i=0; i<bn; i++){ /* for each block row */
231     M = ai[i+1] - ai[i];
232     for (j=0; j<M; j++){
233       itmp = aj[ai[i] + j];    /* block column value */
234       if (itmp == bn){
235         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
236         for (k=0; k<bs; k++) {
237           *cols_i++ = i*bs+k;
238           *v_i++    = aa_i[k];
239         }
240         *ncols += bs;
241         break;
242       }
243     }
244   }
245 #endif
246 
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNC__
251 #define __FUNC__ "MatRestoreRow_SeqSBAIJ"
252 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
253 {
254   int ierr;
255 
256   PetscFunctionBegin;
257   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
258   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatTranspose_SeqSBAIJ"
264 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
265 {
266   PetscFunctionBegin;
267   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
268   /* PetscFunctionReturn(0); */
269 }
270 
271 #undef __FUNC__
272 #define __FUNC__ "MatView_SeqSBAIJ_Binary"
273 static int MatView_SeqSBAIJ_Binary(Mat A,Viewer viewer)
274 {
275   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
276   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
277   Scalar      *aa;
278   FILE        *file;
279 
280   PetscFunctionBegin;
281   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
282   col_lens = (int*)PetscMalloc((4+A->m)*sizeof(int));CHKPTRQ(col_lens);
283   col_lens[0] = MAT_COOKIE;
284 
285   col_lens[1] = A->m;
286   col_lens[2] = A->m;
287   col_lens[3] = a->s_nz*bs2;
288 
289   /* store lengths of each row and write (including header) to file */
290   count = 0;
291   for (i=0; i<a->mbs; i++) {
292     for (j=0; j<bs; j++) {
293       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
294     }
295   }
296   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
297   ierr = PetscFree(col_lens);CHKERRQ(ierr);
298 
299   /* store column indices (zero start index) */
300   jj = (int*)PetscMalloc((a->s_nz+1)*bs2*sizeof(int));CHKPTRQ(jj);
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       for (k=a->i[i]; k<a->i[i+1]; k++) {
305         for (l=0; l<bs; l++) {
306           jj[count++] = bs*a->j[k] + l;
307         }
308       }
309     }
310   }
311   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
312   ierr = PetscFree(jj);CHKERRQ(ierr);
313 
314   /* store nonzero values */
315   aa = (Scalar*)PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
316   count = 0;
317   for (i=0; i<a->mbs; i++) {
318     for (j=0; j<bs; j++) {
319       for (k=a->i[i]; k<a->i[i+1]; k++) {
320         for (l=0; l<bs; l++) {
321           aa[count++] = a->a[bs2*k + l*bs + j];
322         }
323       }
324     }
325   }
326   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
327   ierr = PetscFree(aa);CHKERRQ(ierr);
328 
329   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
330   if (file) {
331     fprintf(file,"-matload_block_size %d\n",a->bs);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNC__
337 #define __FUNC__ "MatView_SeqSBAIJ_ASCII"
338 static int MatView_SeqSBAIJ_ASCII(Mat A,Viewer viewer)
339 {
340   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
341   int         ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2;
342   char        *outputname;
343 
344   PetscFunctionBegin;
345   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
346   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
347   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
348     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
349   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
350     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
351   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
352     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
353     for (i=0; i<a->mbs; i++) {
354       for (j=0; j<bs; j++) {
355         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
356         for (k=a->i[i]; k<a->i[i+1]; k++) {
357           for (l=0; l<bs; l++) {
358 #if defined(PETSC_USE_COMPLEX)
359             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
360               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
361                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
362             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
363               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
364                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
365             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
366               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
367             }
368 #else
369             if (a->a[bs2*k + l*bs + j] != 0.0) {
370               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
371             }
372 #endif
373           }
374         }
375         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
376       }
377     }
378     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
379   } else {
380     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
381     for (i=0; i<a->mbs; i++) {
382       for (j=0; j<bs; j++) {
383         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
384         for (k=a->i[i]; k<a->i[i+1]; k++) {
385           for (l=0; l<bs; l++) {
386 #if defined(PETSC_USE_COMPLEX)
387             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
388               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
389                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
390             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
391               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
392                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
393             } else {
394               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
395             }
396 #else
397             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
398 #endif
399           }
400         }
401         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
402       }
403     }
404     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
405   }
406   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNC__
411 #define __FUNC__ "MatView_SeqSBAIJ_Draw_Zoom"
412 static int MatView_SeqSBAIJ_Draw_Zoom(Draw draw,void *Aa)
413 {
414   Mat          A = (Mat) Aa;
415   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)A->data;
416   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
417   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
418   MatScalar    *aa;
419   MPI_Comm     comm;
420   Viewer       viewer;
421 
422   PetscFunctionBegin;
423   /*
424       This is nasty. If this is called from an originally parallel matrix
425    then all processes call this,but only the first has the matrix so the
426    rest should return immediately.
427   */
428   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
429   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
430   if (rank) PetscFunctionReturn(0);
431 
432   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
433 
434   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
435   DrawString(draw, .3*(xl+xr), .3*(yl+yr), DRAW_BLACK, "symmetric");
436 
437   /* loop over matrix elements drawing boxes */
438   color = DRAW_BLUE;
439   for (i=0,row=0; i<mbs; i++,row+=bs) {
440     for (j=a->i[i]; j<a->i[i+1]; j++) {
441       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
442       x_l = a->j[j]*bs; x_r = x_l + 1.0;
443       aa = a->a + j*bs2;
444       for (k=0; k<bs; k++) {
445         for (l=0; l<bs; l++) {
446           if (PetscRealPart(*aa++) >=  0.) continue;
447           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
448         }
449       }
450     }
451   }
452   color = DRAW_CYAN;
453   for (i=0,row=0; i<mbs; i++,row+=bs) {
454     for (j=a->i[i]; j<a->i[i+1]; j++) {
455       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
456       x_l = a->j[j]*bs; x_r = x_l + 1.0;
457       aa = a->a + j*bs2;
458       for (k=0; k<bs; k++) {
459         for (l=0; l<bs; l++) {
460           if (PetscRealPart(*aa++) != 0.) continue;
461           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
462         }
463       }
464     }
465   }
466 
467   color = DRAW_RED;
468   for (i=0,row=0; i<mbs; i++,row+=bs) {
469     for (j=a->i[i]; j<a->i[i+1]; j++) {
470       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
471       x_l = a->j[j]*bs; x_r = x_l + 1.0;
472       aa = a->a + j*bs2;
473       for (k=0; k<bs; k++) {
474         for (l=0; l<bs; l++) {
475           if (PetscRealPart(*aa++) <= 0.) continue;
476           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
477         }
478       }
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNC__
485 #define __FUNC__ "MatView_SeqSBAIJ_Draw"
486 static int MatView_SeqSBAIJ_Draw(Mat A,Viewer viewer)
487 {
488   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)A->data;
489   int          ierr;
490   PetscReal    xl,yl,xr,yr,w,h;
491   Draw         draw;
492   PetscTruth   isnull;
493 
494   PetscFunctionBegin;
495 
496   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
497   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
498 
499   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
500   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
501   xr += w;    yr += h;  xl = -w;     yl = -h;
502   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
503   ierr = DrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
504   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNC__
509 #define __FUNC__ "MatView_SeqSBAIJ"
510 int MatView_SeqSBAIJ(Mat A,Viewer viewer)
511 {
512   int        ierr;
513   PetscTruth issocket,isascii,isbinary,isdraw;
514 
515   PetscFunctionBegin;
516   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
517   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
518   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
520   if (issocket) {
521     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
522   } else if (isascii){
523     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
524   } else if (isbinary) {
525     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
526   } else if (isdraw) {
527     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
528   } else {
529     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 
535 #undef __FUNC__
536 #define __FUNC__ "MatGetValues_SeqSBAIJ"
537 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
538 {
539   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
540   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
541   int        *ai = a->i,*ailen = a->ilen;
542   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
543   MatScalar  *ap,*aa = a->a,zero = 0.0;
544 
545   PetscFunctionBegin;
546   for (k=0; k<m; k++) { /* loop over rows */
547     row  = im[k]; brow = row/bs;
548     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
549     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
550     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
551     nrow = ailen[brow];
552     for (l=0; l<n; l++) { /* loop over columns */
553       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
554       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
555       col  = in[l] ;
556       bcol = col/bs;
557       cidx = col%bs;
558       ridx = row%bs;
559       high = nrow;
560       low  = 0; /* assume unsorted */
561       while (high-low > 5) {
562         t = (low+high)/2;
563         if (rp[t] > bcol) high = t;
564         else             low  = t;
565       }
566       for (i=low; i<high; i++) {
567         if (rp[i] > bcol) break;
568         if (rp[i] == bcol) {
569           *v++ = ap[bs2*i+bs*cidx+ridx];
570           goto finished;
571         }
572       }
573       *v++ = zero;
574       finished:;
575     }
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 
581 #undef __FUNC__
582 #define __FUNC__ "MatSetValuesBlocked_SeqSBAIJ"
583 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
584 {
585   PetscFunctionBegin;
586   SETERRQ(1,"Function not yet written for SBAIJ format");
587   /* PetscFunctionReturn(0); */
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatAssemblyEnd_SeqSBAIJ"
592 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
593 {
594   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
595   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
596   int        m = A->m,*ip,N,*ailen = a->ilen;
597   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
598   MatScalar  *aa = a->a,*ap;
599 
600   PetscFunctionBegin;
601   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
602 
603   if (m) rmax = ailen[0];
604   for (i=1; i<mbs; i++) {
605     /* move each row back by the amount of empty slots (fshift) before it*/
606     fshift += imax[i-1] - ailen[i-1];
607     rmax   = PetscMax(rmax,ailen[i]);
608     if (fshift) {
609       ip = aj + ai[i]; ap = aa + bs2*ai[i];
610       N = ailen[i];
611       for (j=0; j<N; j++) {
612         ip[j-fshift] = ip[j];
613         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
614       }
615     }
616     ai[i] = ai[i-1] + ailen[i-1];
617   }
618   if (mbs) {
619     fshift += imax[mbs-1] - ailen[mbs-1];
620     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
621   }
622   /* reset ilen and imax for each row */
623   for (i=0; i<mbs; i++) {
624     ailen[i] = imax[i] = ai[i+1] - ai[i];
625   }
626   a->s_nz = ai[mbs];
627 
628   /* diagonals may have moved, so kill the diagonal pointers */
629   if (fshift && a->diag) {
630     ierr = PetscFree(a->diag);CHKERRQ(ierr);
631     PLogObjectMemory(A,-(m+1)*sizeof(int));
632     a->diag = 0;
633   }
634   PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
635            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
636   PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
637            a->reallocs);
638   PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
639   a->reallocs          = 0;
640   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
641 
642   PetscFunctionReturn(0);
643 }
644 
645 /*
646    This function returns an array of flags which indicate the locations of contiguous
647    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
648    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
649    Assume: sizes should be long enough to hold all the values.
650 */
651 #undef __FUNC__
652 #define __FUNC__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
653 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
654 {
655   int        i,j,k,row;
656   PetscTruth flg;
657 
658   PetscFunctionBegin;
659   for (i=0,j=0; i<n; j++) {
660     row = idx[i];
661     if (row%bs!=0) { /* Not the begining of a block */
662       sizes[j] = 1;
663       i++;
664     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
665       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
666       i++;
667     } else { /* Begining of the block, so check if the complete block exists */
668       flg = PETSC_TRUE;
669       for (k=1; k<bs; k++) {
670         if (row+k != idx[i+k]) { /* break in the block */
671           flg = PETSC_FALSE;
672           break;
673         }
674       }
675       if (flg == PETSC_TRUE) { /* No break in the bs */
676         sizes[j] = bs;
677         i+= bs;
678       } else {
679         sizes[j] = 1;
680         i++;
681       }
682     }
683   }
684   *bs_max = j;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNC__
689 #define __FUNC__ "MatZeroRows_SeqSBAIJ"
690 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag)
691 {
692   Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
693   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
694   int         bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
695   Scalar      zero = 0.0;
696   MatScalar   *aa;
697 
698   PetscFunctionBegin;
699   /* Make a copy of the IS and  sort it */
700   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
701   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
702 
703   /* allocate memory for rows,sizes */
704   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
705   sizes = rows + is_n;
706 
707   /* initialize copy IS values to rows, and sort them */
708   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
709   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
710   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
711     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
712     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
713   } else {
714     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
715   }
716   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
717 
718   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
719     row   = rows[j];                  /* row to be zeroed */
720     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
721     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
722     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
723     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
724       if (diag) {
725         if (sbaij->ilen[row/bs] > 0) {
726           sbaij->ilen[row/bs] = 1;
727           sbaij->j[sbaij->i[row/bs]] = row/bs;
728           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
729         }
730         /* Now insert all the diagoanl values for this bs */
731         for (k=0; k<bs; k++) {
732           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
733         }
734       } else { /* (!diag) */
735         sbaij->ilen[row/bs] = 0;
736       } /* end (!diag) */
737     } else { /* (sizes[i] != bs), broken block */
738 #if defined (PETSC_USE_DEBUG)
739       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
740 #endif
741       for (k=0; k<count; k++) {
742         aa[0] = zero;
743         aa+=bs;
744       }
745       if (diag) {
746         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
747       }
748     }
749   }
750 
751   ierr = PetscFree(rows);CHKERRQ(ierr);
752   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
753   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 /* Only add/insert a(i,j) with i<=j (blocks).
758    Any a(i,j) with i>j input by user is ingored.
759 */
760 
761 #undef __FUNC__
762 #define __FUNC__ "MatSetValues_SeqSBAIJ"
763 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
764 {
765   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
766   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
767   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
768   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
769   int         ridx,cidx,bs2=a->bs2,ierr;
770   MatScalar   *ap,value,*aa=a->a,*bap;
771 
772   PetscFunctionBegin;
773 
774   for (k=0; k<m; k++) { /* loop over added rows */
775     row  = im[k];       /* row number */
776     brow = row/bs;      /* block row number */
777     if (row < 0) continue;
778 #if defined(PETSC_USE_BOPT_g)
779     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
780 #endif
781     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
782     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
783     rmax = imax[brow];  /* maximum space allocated for this row */
784     nrow = ailen[brow]; /* actual length of this row */
785     low  = 0;
786 
787     for (l=0; l<n; l++) { /* loop over added columns */
788       if (in[l] < 0) continue;
789 #if defined(PETSC_USE_BOPT_g)
790       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
791 #endif
792       col = in[l];
793       bcol = col/bs;              /* block col number */
794 
795       if (brow <= bcol){
796         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
797         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
798           /* element value a(k,l) */
799           if (roworiented) {
800             value = v[l + k*n];
801           } else {
802             value = v[k + l*m];
803           }
804 
805           /* move pointer bap to a(k,l) quickly and add/insert value */
806           if (!sorted) low = 0; high = nrow;
807           while (high-low > 7) {
808             t = (low+high)/2;
809             if (rp[t] > bcol) high = t;
810             else              low  = t;
811           }
812           for (i=low; i<high; i++) {
813             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
814             if (rp[i] > bcol) break;
815             if (rp[i] == bcol) {
816               bap  = ap +  bs2*i + bs*cidx + ridx;
817               if (is == ADD_VALUES) *bap += value;
818               else                  *bap  = value;
819               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
820               if (brow == bcol && ridx < cidx){
821                 bap  = ap +  bs2*i + bs*ridx + cidx;
822                 if (is == ADD_VALUES) *bap += value;
823                 else                  *bap  = value;
824               }
825               goto noinsert1;
826             }
827           }
828 
829       if (nonew == 1) goto noinsert1;
830       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
831       if (nrow >= rmax) {
832         /* there is no extra room in row, therefore enlarge */
833         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
834         MatScalar *new_a;
835 
836         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
837 
838         /* Malloc new storage space */
839         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
840         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
841         new_j   = (int*)(new_a + bs2*new_nz);
842         new_i   = new_j + new_nz;
843 
844         /* copy over old data into new slots */
845         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
846         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
847         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
848         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
849         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
850         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
851         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
852         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
853         /* free up old matrix storage */
854         ierr = PetscFree(a->a);CHKERRQ(ierr);
855         if (!a->singlemalloc) {
856           ierr = PetscFree(a->i);CHKERRQ(ierr);
857           ierr = PetscFree(a->j);CHKERRQ(ierr);
858         }
859         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
860         a->singlemalloc = PETSC_TRUE;
861 
862         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
863         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
864         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
865         a->s_maxnz += bs2*CHUNKSIZE;
866         a->reallocs++;
867         a->s_nz++;
868       }
869 
870       N = nrow++ - 1;
871       /* shift up all the later entries in this row */
872       for (ii=N; ii>=i; ii--) {
873         rp[ii+1] = rp[ii];
874         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
875       }
876       if (N>=i) {
877         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
878       }
879       rp[i]                      = bcol;
880       ap[bs2*i + bs*cidx + ridx] = value;
881       noinsert1:;
882       low = i;
883       /* } */
884         }
885       } /* end of if .. if..  */
886     }                     /* end of loop over added columns */
887     ailen[brow] = nrow;
888   }                       /* end of loop over added rows */
889 
890   PetscFunctionReturn(0);
891 }
892 
893 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
894 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
895 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
896 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
897 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
898 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
899 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
900 extern int MatScale_SeqSBAIJ(Scalar*,Mat);
901 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
902 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
903 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
904 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
905 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
906 extern int MatZeroEntries_SeqSBAIJ(Mat);
907 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
908 
909 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
910 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
911 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
912 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
913 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
914 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
915 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
916 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
917 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
918 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
919 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
920 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
921 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
922 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
923 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
924 
925 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
926 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
927 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
928 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
929 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
930 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
931 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
932 
933 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
934 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
935 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
936 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
937 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
938 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
939 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
940 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
941 
942 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
943 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
944 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
945 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
946 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
947 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
948 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
949 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
950 
951 #ifdef HAVE_MatIncompleteCholeskyFactor
952 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
953 #undef __FUNC__
954 #define __FUNC__ "MatIncompleteCholeskyFactor_SeqSBAIJ"
955 int MatIncompleteCholeskyFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
956 {
957   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
958   Mat         outA;
959   int         ierr;
960   PetscTruth  row_identity,col_identity;
961 
962   PetscFunctionBegin;
963   /*
964   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
965   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
966   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
967   if (!row_identity || !col_identity) {
968     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
969   }
970   */
971 
972   outA          = inA;
973   inA->factor   = FACTOR_LU;
974 
975   if (!a->diag) {
976     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
977   }
978   /*
979       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
980       for ILU(0) factorization with natural ordering
981   */
982   switch (a->bs) {
983   case 1:
984     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_2_NaturalOrdering; /* 2? */
985     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
986   case 2:
987     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
988     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
989     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
990     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
991     break;
992   case 3:
993     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
994     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
995     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
996     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
997     break;
998   case 4:
999     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1000     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1001     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1002     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1003     break;
1004   case 5:
1005     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1006     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1007     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1008     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1009     break;
1010   case 6:
1011     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1012     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1013     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1014     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1015     break;
1016   case 7:
1017     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1018     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1019     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1020     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1021     break;
1022   default:
1023     a->row        = row;
1024     a->icol       = col;
1025     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1026     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1027 
1028     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1029     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1030     PLogObjectParent(inA,a->icol);
1031 
1032     if (!a->solve_work) {
1033       a->solve_work = (Scalar*)PetscMalloc((A->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1034       PLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar));
1035     }
1036   }
1037 
1038   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1039 
1040   PetscFunctionReturn(0);
1041 }
1042 #endif
1043 
1044 #undef __FUNC__
1045 #define __FUNC__ "MatPrintHelp_SeqSBAIJ"
1046 int MatPrintHelp_SeqSBAIJ(Mat A)
1047 {
1048   static PetscTruth called = PETSC_FALSE;
1049   MPI_Comm          comm = A->comm;
1050   int               ierr;
1051 
1052   PetscFunctionBegin;
1053   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1054   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1055   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 EXTERN_C_BEGIN
1060 #undef __FUNC__
1061 #define __FUNC__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1062 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1063 {
1064   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1065   int         i,nz,n;
1066 
1067   PetscFunctionBegin;
1068   nz = baij->s_maxnz;
1069   n  = mat->n;
1070   for (i=0; i<nz; i++) {
1071     baij->j[i] = indices[i];
1072   }
1073   baij->s_nz = nz;
1074   for (i=0; i<n; i++) {
1075     baij->ilen[i] = baij->imax[i];
1076   }
1077 
1078   PetscFunctionReturn(0);
1079 }
1080 EXTERN_C_END
1081 
1082 #undef __FUNC__
1083 #define __FUNC__ "MatSeqSBAIJSetColumnIndices"
1084 /*@
1085     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1086        in the matrix.
1087 
1088   Input Parameters:
1089 +  mat     - the SeqSBAIJ matrix
1090 -  indices - the column indices
1091 
1092   Level: advanced
1093 
1094   Notes:
1095     This can be called if you have precomputed the nonzero structure of the
1096   matrix and want to provide it to the matrix object to improve the performance
1097   of the MatSetValues() operation.
1098 
1099     You MUST have set the correct numbers of nonzeros per row in the call to
1100   MatCreateSeqSBAIJ().
1101 
1102     MUST be called before any calls to MatSetValues()
1103 
1104 .seealso: MatCreateSeqSBAIJ
1105 @*/
1106 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1107 {
1108   int ierr,(*f)(Mat,int *);
1109 
1110   PetscFunctionBegin;
1111   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1112   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1113   if (f) {
1114     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1115   } else {
1116     SETERRQ(1,"Wrong type of matrix to set column indices");
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ /*<a name="MatSetUpPreallocation_SeqSBAIJ"></a>*/"MatSetUpPreallocation_SeqSBAIJ"
1123 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1124 {
1125   int        ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 /* -------------------------------------------------------------------*/
1133 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1134        MatGetRow_SeqSBAIJ,
1135        MatRestoreRow_SeqSBAIJ,
1136        MatMult_SeqSBAIJ_N,
1137        MatMultAdd_SeqSBAIJ_N,
1138        MatMultTranspose_SeqSBAIJ,
1139        MatMultTransposeAdd_SeqSBAIJ,
1140        MatSolve_SeqSBAIJ_N,
1141        0,
1142        0,
1143        0,
1144        0,
1145        MatCholeskyFactor_SeqSBAIJ,
1146        0,
1147        MatTranspose_SeqSBAIJ,
1148        MatGetInfo_SeqSBAIJ,
1149        MatEqual_SeqSBAIJ,
1150        MatGetDiagonal_SeqSBAIJ,
1151        MatDiagonalScale_SeqSBAIJ,
1152        MatNorm_SeqSBAIJ,
1153        0,
1154        MatAssemblyEnd_SeqSBAIJ,
1155        0,
1156        MatSetOption_SeqSBAIJ,
1157        MatZeroEntries_SeqSBAIJ,
1158        MatZeroRows_SeqSBAIJ,
1159        0,
1160        0,
1161        MatCholeskyFactorSymbolic_SeqSBAIJ,
1162        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1163        MatSetUpPreallocation_SeqSBAIJ,
1164        0,
1165        MatGetOwnershipRange_SeqSBAIJ,
1166        0,
1167        MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ,
1168        0,
1169        0,
1170        MatDuplicate_SeqSBAIJ,
1171        0,
1172        0,
1173        0,
1174        0,
1175        0,
1176        MatGetSubMatrices_SeqSBAIJ,
1177        MatIncreaseOverlap_SeqSBAIJ,
1178        MatGetValues_SeqSBAIJ,
1179        0,
1180        MatPrintHelp_SeqSBAIJ,
1181        MatScale_SeqSBAIJ,
1182        0,
1183        0,
1184        0,
1185        MatGetBlockSize_SeqSBAIJ,
1186        MatGetRowIJ_SeqSBAIJ,
1187        MatRestoreRowIJ_SeqSBAIJ,
1188        0,
1189        0,
1190        0,
1191        0,
1192        0,
1193        0,
1194        MatSetValuesBlocked_SeqSBAIJ,
1195        MatGetSubMatrix_SeqSBAIJ,
1196        0,
1197        0,
1198        MatGetMaps_Petsc,
1199        0,
1200        0,
1201        0,
1202        0,
1203        0,
1204        0,
1205        MatGetRowMax_SeqSBAIJ};
1206 
1207 EXTERN_C_BEGIN
1208 #undef __FUNC__
1209 #define __FUNC__ "MatStoreValues_SeqSBAIJ"
1210 int MatStoreValues_SeqSBAIJ(Mat mat)
1211 {
1212   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1213   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1214   int         ierr;
1215 
1216   PetscFunctionBegin;
1217   if (aij->nonew != 1) {
1218     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1219   }
1220 
1221   /* allocate space for values if not already there */
1222   if (!aij->saved_values) {
1223     aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1224   }
1225 
1226   /* copy values over */
1227   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 EXTERN_C_END
1231 
1232 EXTERN_C_BEGIN
1233 #undef __FUNC__
1234 #define __FUNC__ "MatRetrieveValues_SeqSBAIJ"
1235 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1236 {
1237   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1238   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1239 
1240   PetscFunctionBegin;
1241   if (aij->nonew != 1) {
1242     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1243   }
1244   if (!aij->saved_values) {
1245     SETERRQ(1,"Must call MatStoreValues(A);first");
1246   }
1247 
1248   /* copy values over */
1249   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 EXTERN_C_END
1253 
1254 EXTERN_C_BEGIN
1255 #undef __FUNC__
1256 #define __FUNC__ "MatCreate_SeqSBAIJ"
1257 int MatCreate_SeqSBAIJ(Mat B)
1258 {
1259   Mat_SeqSBAIJ *b;
1260   int          i,ierr,size;
1261   PetscTruth   flg;
1262   int          s_nz;
1263 
1264   PetscFunctionBegin;
1265   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1266   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1267   B->m = B->M = PetscMax(B->m,B->M);
1268   B->n = B->N = PetscMax(B->n,B->N);
1269 
1270   B->data = (void*)(b = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(b);
1271   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1272   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1273   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1274   B->ops->view        = MatView_SeqSBAIJ;
1275   B->factor           = 0;
1276   B->lupivotthreshold = 1.0;
1277   B->mapping          = 0;
1278   b->row              = 0;
1279   b->icol             = 0;
1280   b->reallocs         = 0;
1281   b->saved_values     = 0;
1282 
1283   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1284   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1285 
1286   b->sorted           = PETSC_FALSE;
1287   b->roworiented      = PETSC_TRUE;
1288   b->nonew            = 0;
1289   b->diag             = 0;
1290   b->solve_work       = 0;
1291   b->mult_work        = 0;
1292   b->spptr            = 0;
1293   b->keepzeroedrows   = PETSC_FALSE;
1294 
1295   b->inew             = 0;
1296   b->jnew             = 0;
1297   b->anew             = 0;
1298   b->a2anew           = 0;
1299   b->permute          = PETSC_FALSE;
1300 
1301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1302                                      "MatStoreValues_SeqSBAIJ",
1303                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1305                                      "MatRetrieveValues_SeqSBAIJ",
1306                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1308                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1309                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 EXTERN_C_END
1313 
1314 #undef __FUNC__
1315 #define __FUNC__ "MatSeqSBAIJSetPreallocation"
1316 /*@C
1317    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1318    compressed row) format.  For good matrix assembly performance the
1319    user should preallocate the matrix storage by setting the parameter nz
1320    (or the array nnz).  By setting these parameters accurately, performance
1321    during matrix assembly can be increased by more than a factor of 50.
1322 
1323    Collective on Mat
1324 
1325    Input Parameters:
1326 +  A - the symmetric matrix
1327 .  bs - size of block
1328 .  nz - number of block nonzeros per block row (same for all rows)
1329 -  nnz - array containing the number of block nonzeros in the various block rows
1330          (possibly different for each block row) or PETSC_NULL
1331 
1332    Options Database Keys:
1333 .   -mat_no_unroll - uses code that does not unroll the loops in the
1334                      block calculations (much slower)
1335 .    -mat_block_size - size of the blocks to use
1336 
1337    Level: intermediate
1338 
1339    Notes:
1340    The block AIJ format is compatible with standard Fortran 77
1341    storage.  That is, the stored row and column indices can begin at
1342    either one (as in Fortran) or zero.  See the users' manual for details.
1343 
1344    Specify the preallocated storage with either nz or nnz (not both).
1345    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1346    allocation.  For additional details, see the users manual chapter on
1347    matrices.
1348 
1349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1350 @*/
1351 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1352 {
1353   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1354   int          i,len,ierr,mbs,bs2,size;
1355   PetscTruth   flg;
1356   int          s_nz;
1357 
1358   PetscFunctionBegin;
1359   B->preallocated = PETSC_TRUE;
1360   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1361   mbs  = B->m/bs;
1362   bs2  = bs*bs;
1363 
1364   if (mbs*bs != B->m) {
1365     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1366   }
1367 
1368   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than -2: value %d",nz);
1369   if (nnz) {
1370     for (i=0; i<mbs; i++) {
1371       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1372     }
1373   }
1374 
1375   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1376   if (!flg) {
1377     switch (bs) {
1378     case 1:
1379       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1380       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1381       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1382       B->ops->mult            = MatMult_SeqSBAIJ_1;
1383       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1384       break;
1385     case 2:
1386       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1387       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1388       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1389       B->ops->mult            = MatMult_SeqSBAIJ_2;
1390       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1391       break;
1392     case 3:
1393       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1394       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1395       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1396       B->ops->mult            = MatMult_SeqSBAIJ_3;
1397       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1398       break;
1399     case 4:
1400       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1401       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1402       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1403       B->ops->mult            = MatMult_SeqSBAIJ_4;
1404       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1405       break;
1406     case 5:
1407       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1408       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1409       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1410       B->ops->mult            = MatMult_SeqSBAIJ_5;
1411       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1412       break;
1413     case 6:
1414       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1415       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1416       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1417       B->ops->mult            = MatMult_SeqSBAIJ_6;
1418       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1419       break;
1420     case 7:
1421       B->ops->mult            = MatMult_SeqSBAIJ_7;
1422       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1423       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1424       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1425       break;
1426     }
1427   }
1428 
1429   b->mbs     = mbs;
1430   b->nbs     = mbs;
1431   b->imax    = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax);
1432   if (!nnz) {
1433     if (nz == PETSC_DEFAULT) nz = 5;
1434     else if (nz <= 0)        nz = 1;
1435     for (i=0; i<mbs; i++) {
1436       b->imax[i] = nz;
1437     }
1438     nz = nz*mbs; /* total nz */
1439   } else {
1440     nz = 0;
1441     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1442   }
1443   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1444   s_nz = nz;
1445 
1446   /* allocate the matrix space */
1447   len   = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1448   b->a  = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a);
1449   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1450   b->j  = (int*)(b->a + s_nz*bs2);
1451   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1452   b->i  = b->j + s_nz;
1453   b->singlemalloc = PETSC_TRUE;
1454 
1455   /* pointer to beginning of each row */
1456   b->i[0] = 0;
1457   for (i=1; i<mbs+1; i++) {
1458     b->i[i] = b->i[i-1] + b->imax[i-1];
1459   }
1460 
1461   /* b->ilen will count nonzeros in each block row so far. */
1462   b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));
1463   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1464   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1465 
1466   b->bs               = bs;
1467   b->bs2              = bs2;
1468   b->s_nz             = 0;
1469   b->s_maxnz          = s_nz*bs2;
1470 
1471   b->inew             = 0;
1472   b->jnew             = 0;
1473   b->anew             = 0;
1474   b->a2anew           = 0;
1475   b->permute          = PETSC_FALSE;
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 
1480 #undef __FUNC__
1481 #define __FUNC__ "MatCreateSeqSBAIJ"
1482 /*@C
1483    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1484    compressed row) format.  For good matrix assembly performance the
1485    user should preallocate the matrix storage by setting the parameter nz
1486    (or the array nnz).  By setting these parameters accurately, performance
1487    during matrix assembly can be increased by more than a factor of 50.
1488 
1489    Collective on MPI_Comm
1490 
1491    Input Parameters:
1492 +  comm - MPI communicator, set to PETSC_COMM_SELF
1493 .  bs - size of block
1494 .  m - number of rows, or number of columns
1495 .  nz - number of block nonzeros per block row (same for all rows)
1496 -  nnz - array containing the number of block nonzeros in the various block rows
1497          (possibly different for each block row) or PETSC_NULL
1498 
1499    Output Parameter:
1500 .  A - the symmetric matrix
1501 
1502    Options Database Keys:
1503 .   -mat_no_unroll - uses code that does not unroll the loops in the
1504                      block calculations (much slower)
1505 .    -mat_block_size - size of the blocks to use
1506 
1507    Level: intermediate
1508 
1509    Notes:
1510    The block AIJ format is compatible with standard Fortran 77
1511    storage.  That is, the stored row and column indices can begin at
1512    either one (as in Fortran) or zero.  See the users' manual for details.
1513 
1514    Specify the preallocated storage with either nz or nnz (not both).
1515    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1516    allocation.  For additional details, see the users manual chapter on
1517    matrices.
1518 
1519 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1520 @*/
1521 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1522 {
1523   int ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1527   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1528   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNC__
1533 #define __FUNC__ "MatDuplicate_SeqSBAIJ"
1534 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1535 {
1536   Mat         C;
1537   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1538   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1539 
1540   PetscFunctionBegin;
1541   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1542 
1543   *B = 0;
1544   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&C);CHKERRQ(ierr);
1545   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1546   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1547   C->preallocated   = PETSC_TRUE;
1548   C->factor         = A->factor;
1549   c->row            = 0;
1550   c->icol           = 0;
1551   c->saved_values   = 0;
1552   c->keepzeroedrows = a->keepzeroedrows;
1553   C->assembled      = PETSC_TRUE;
1554 
1555   c->bs         = a->bs;
1556   c->bs2        = a->bs2;
1557   c->mbs        = a->mbs;
1558   c->nbs        = a->nbs;
1559 
1560   c->imax       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1561   c->ilen       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1562   for (i=0; i<mbs; i++) {
1563     c->imax[i] = a->imax[i];
1564     c->ilen[i] = a->ilen[i];
1565   }
1566 
1567   /* allocate the matrix space */
1568   c->singlemalloc = PETSC_TRUE;
1569   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1570   c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a);
1571   c->j = (int*)(c->a + nz*bs2);
1572   c->i = c->j + nz;
1573   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1574   if (mbs > 0) {
1575     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1576     if (cpvalues == MAT_COPY_VALUES) {
1577       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1578     } else {
1579       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1580     }
1581   }
1582 
1583   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1584   c->sorted      = a->sorted;
1585   c->roworiented = a->roworiented;
1586   c->nonew       = a->nonew;
1587 
1588   if (a->diag) {
1589     c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag);
1590     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1591     for (i=0; i<mbs; i++) {
1592       c->diag[i] = a->diag[i];
1593     }
1594   } else c->diag        = 0;
1595   c->s_nz               = a->s_nz;
1596   c->s_maxnz            = a->s_maxnz;
1597   c->solve_work         = 0;
1598   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1599   c->mult_work          = 0;
1600   *B = C;
1601   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 EXTERN_C_BEGIN
1606 #undef __FUNC__
1607 #define __FUNC__ "MatLoad_SeqSBAIJ"
1608 int MatLoad_SeqSBAIJ(Viewer viewer,MatType type,Mat *A)
1609 {
1610   Mat_SeqSBAIJ  *a;
1611   Mat          B;
1612   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1613   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount;
1614   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1615   int          *masked,nmask,tmp,bs2,ishift;
1616   Scalar       *aa;
1617   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1618 
1619   PetscFunctionBegin;
1620   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1621   bs2  = bs*bs;
1622 
1623   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1624   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1625   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1626   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1627   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1628   M = header[1]; N = header[2]; nz = header[3];
1629 
1630   if (header[3] < 0) {
1631     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1632   }
1633 
1634   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1635 
1636   /*
1637      This code adds extra rows to make sure the number of rows is
1638     divisible by the blocksize
1639   */
1640   mbs        = M/bs;
1641   extra_rows = bs - M + bs*(mbs);
1642   if (extra_rows == bs) extra_rows = 0;
1643   else                  mbs++;
1644   if (extra_rows) {
1645     PLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1646   }
1647 
1648   /* read in row lengths */
1649   rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1650   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1651   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1652 
1653   /* read in column indices */
1654   jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj);
1655   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1656   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1657 
1658   /* loop over row lengths determining block row lengths */
1659   browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1660   s_browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(s_browlengths);
1661   ierr        = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1662   mask        = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask);
1663   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1664   masked      = mask + mbs;
1665   rowcount    = 0; nzcount = 0;
1666   for (i=0; i<mbs; i++) {
1667     nmask = 0;
1668     for (j=0; j<bs; j++) {
1669       kmax = rowlengths[rowcount];
1670       for (k=0; k<kmax; k++) {
1671         tmp = jj[nzcount++]/bs;   /* block col. index */
1672         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1673       }
1674       rowcount++;
1675     }
1676     s_browlengths[i] += nmask;
1677     browlengths[i]    = 2*s_browlengths[i];
1678 
1679     /* zero out the mask elements we set */
1680     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1681   }
1682 
1683   /* create our matrix */
1684   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1685   B = *A;
1686   a = (Mat_SeqSBAIJ*)B->data;
1687 
1688   /* set matrix "i" values */
1689   a->i[0] = 0;
1690   for (i=1; i<= mbs; i++) {
1691     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1692     a->ilen[i-1] = s_browlengths[i-1];
1693   }
1694   a->s_nz         = 0;
1695   for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i];
1696 
1697   /* read in nonzero values */
1698   aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1699   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1700   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1701 
1702   /* set "a" and "j" values into matrix */
1703   nzcount = 0; jcount = 0;
1704   for (i=0; i<mbs; i++) {
1705     nzcountb = nzcount;
1706     nmask    = 0;
1707     for (j=0; j<bs; j++) {
1708       kmax = rowlengths[i*bs+j];
1709       for (k=0; k<kmax; k++) {
1710         tmp = jj[nzcount++]/bs; /* block col. index */
1711         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1712       }
1713       rowcount++;
1714     }
1715     /* sort the masked values */
1716     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1717 
1718     /* set "j" values into matrix */
1719     maskcount = 1;
1720     for (j=0; j<nmask; j++) {
1721       a->j[jcount++]  = masked[j];
1722       mask[masked[j]] = maskcount++;
1723     }
1724 
1725     /* set "a" values into matrix */
1726     ishift = bs2*a->i[i];
1727     for (j=0; j<bs; j++) {
1728       kmax = rowlengths[i*bs+j];
1729       for (k=0; k<kmax; k++) {
1730         tmp       = jj[nzcountb]/bs ; /* block col. index */
1731         if (tmp >= i){
1732           block     = mask[tmp] - 1;
1733           point     = jj[nzcountb] - bs*tmp;
1734           idx       = ishift + bs2*block + j + bs*point;
1735           a->a[idx] = aa[nzcountb];
1736         }
1737         nzcountb++;
1738       }
1739     }
1740     /* zero out the mask elements we set */
1741     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1742   }
1743   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1744 
1745   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1746   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1747   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1748   ierr = PetscFree(aa);CHKERRQ(ierr);
1749   ierr = PetscFree(jj);CHKERRQ(ierr);
1750   ierr = PetscFree(mask);CHKERRQ(ierr);
1751 
1752   B->assembled = PETSC_TRUE;
1753   ierr = MatView_Private(B);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 EXTERN_C_END
1757 
1758 
1759 
1760