xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 11af2f92ec45033ee96dfd8ef43995ce1c31cf8e)
1 /*$Id: sbaij.c,v 1.48 2001/01/20 03:35:00 bsmith Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                            /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 #include "src/mat/impls/sbaij/seq/sbaij.h"
12 
13 #define CHUNKSIZE  10
14 
15 /*
16      Checks for missing diagonals
17 */
18 #undef __FUNC__
19 #define __FUNC__ "MatMissingDiagonal_SeqSBAIJ"
20 int MatMissingDiagonal_SeqSBAIJ(Mat A)
21 {
22   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23   int         *diag,*jj = a->j,i,ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
27   diag = a->diag;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
31     }
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNC__
37 #define __FUNC__ "MatMarkDiagonal_SeqSBAIJ"
38 int MatMarkDiagonal_SeqSBAIJ(Mat A)
39 {
40   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
41   int          i,j,*diag,m = a->mbs,ierr;
42 
43   PetscFunctionBegin;
44   if (a->diag) PetscFunctionReturn(0);
45 
46   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
47   PetscLogObjectMemory(A,(m+1)*sizeof(int));
48   for (i=0; i<m; i++) {
49     diag[i] = a->i[i+1];
50     for (j=a->i[i]; j<a->i[i+1]; j++) {
51       if (a->j[j] == i) {
52         diag[i] = j;
53         break;
54       }
55     }
56   }
57   a->diag = diag;
58   PetscFunctionReturn(0);
59 }
60 
61 
62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
63 
64 #undef __FUNC__
65 #define __FUNC__ "MatGetRowIJ_SeqSBAIJ"
66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
67 {
68   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
69 
70   PetscFunctionBegin;
71   if (ia) {
72     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
73   }
74   *nn = a->mbs;
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNC__
79 #define __FUNC__ "MatRestoreRowIJ_SeqSBAIJ"
80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
81 {
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatGetBlockSize_SeqSBAIJ"
90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
91 {
92   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
93 
94   PetscFunctionBegin;
95   *bs = sbaij->bs;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNC__
100 #define __FUNC__ "MatDestroy_SeqSBAIJ"
101 int MatDestroy_SeqSBAIJ(Mat A)
102 {
103   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
104   int         ierr;
105 
106   PetscFunctionBegin;
107 #if defined(PETSC_USE_LOG)
108   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
109 #endif
110   ierr = PetscFree(a->a);CHKERRQ(ierr);
111   if (!a->singlemalloc) {
112     ierr = PetscFree(a->i);CHKERRQ(ierr);
113     ierr = PetscFree(a->j);CHKERRQ(ierr);
114   }
115   if (a->row) {
116     ierr = ISDestroy(a->row);CHKERRQ(ierr);
117   }
118   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
120   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
121   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
122   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
125 
126   if (a->inew){
127     ierr = PetscFree(a->inew);CHKERRQ(ierr);
128     a->inew = 0;
129   }
130   ierr = PetscFree(a);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNC__
135 #define __FUNC__ "MatSetOption_SeqSBAIJ"
136 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
137 {
138   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
139 
140   PetscFunctionBegin;
141   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
142   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
143   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
144   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
145   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
146   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
147   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
148   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
149   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
150   else if (op == MAT_ROWS_SORTED ||
151            op == MAT_ROWS_UNSORTED ||
152            op == MAT_SYMMETRIC ||
153            op == MAT_STRUCTURALLY_SYMMETRIC ||
154            op == MAT_YES_NEW_DIAGONALS ||
155            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
156            op == MAT_USE_HASH_TABLE) {
157     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
158   } else if (op == MAT_NO_NEW_DIAGONALS) {
159     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
160   } else {
161     SETERRQ(PETSC_ERR_SUP,"unknown option");
162   }
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNC__
167 #define __FUNC__ "MatGetOwnershipRange_SeqSBAIJ"
168 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n)
169 {
170   PetscFunctionBegin;
171   *m = 0;
172   *n = A->m;
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNC__
177 #define __FUNC__ "MatGetRow_SeqSBAIJ"
178 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v)
179 {
180   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
181   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
182   MatScalar    *aa,*aa_i;
183   Scalar       *v_i;
184 
185   PetscFunctionBegin;
186   bs  = a->bs;
187   ai  = a->i;
188   aj  = a->j;
189   aa  = a->a;
190   bs2 = a->bs2;
191 
192   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
193 
194   bn  = row/bs;   /* Block number */
195   bp  = row % bs; /* Block position */
196   M   = ai[bn+1] - ai[bn];
197   *ncols = bs*M;
198 
199   if (v) {
200     *v = 0;
201     if (*ncols) {
202       ierr = PetscMalloc((*ncols+row)*sizeof(Scalar),v);CHKERRQ(ierr);
203       for (i=0; i<M; i++) { /* for each block in the block row */
204         v_i  = *v + i*bs;
205         aa_i = aa + bs2*(ai[bn] + i);
206         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
207       }
208     }
209   }
210 
211   if (cols) {
212     *cols = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         cols_i = *cols + i*bs;
217         itmp  = bs*aj[ai[bn] + i];
218         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
219       }
220     }
221   }
222 
223   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
224   /* this segment is currently removed, so only entries in the upper triangle are obtained */
225 #ifdef column_search
226   v_i    = *v    + M*bs;
227   cols_i = *cols + M*bs;
228   for (i=0; i<bn; i++){ /* for each block row */
229     M = ai[i+1] - ai[i];
230     for (j=0; j<M; j++){
231       itmp = aj[ai[i] + j];    /* block column value */
232       if (itmp == bn){
233         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
234         for (k=0; k<bs; k++) {
235           *cols_i++ = i*bs+k;
236           *v_i++    = aa_i[k];
237         }
238         *ncols += bs;
239         break;
240       }
241     }
242   }
243 #endif
244 
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNC__
249 #define __FUNC__ "MatRestoreRow_SeqSBAIJ"
250 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   int ierr;
253 
254   PetscFunctionBegin;
255   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
256   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNC__
261 #define __FUNC__ "MatTranspose_SeqSBAIJ"
262 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
263 {
264   PetscFunctionBegin;
265   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
266   /* PetscFunctionReturn(0); */
267 }
268 
269 #undef __FUNC__
270 #define __FUNC__ "MatView_SeqSBAIJ_Binary"
271 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
272 {
273   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
274   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
275   Scalar      *aa;
276   FILE        *file;
277 
278   PetscFunctionBegin;
279   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
280   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
281   col_lens[0] = MAT_COOKIE;
282 
283   col_lens[1] = A->m;
284   col_lens[2] = A->m;
285   col_lens[3] = a->s_nz*bs2;
286 
287   /* store lengths of each row and write (including header) to file */
288   count = 0;
289   for (i=0; i<a->mbs; i++) {
290     for (j=0; j<bs; j++) {
291       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
292     }
293   }
294   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
295   ierr = PetscFree(col_lens);CHKERRQ(ierr);
296 
297   /* store column indices (zero start index) */
298   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
299   count = 0;
300   for (i=0; i<a->mbs; i++) {
301     for (j=0; j<bs; j++) {
302       for (k=a->i[i]; k<a->i[i+1]; k++) {
303         for (l=0; l<bs; l++) {
304           jj[count++] = bs*a->j[k] + l;
305         }
306       }
307     }
308   }
309   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
310   ierr = PetscFree(jj);CHKERRQ(ierr);
311 
312   /* store nonzero values */
313   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
314   count = 0;
315   for (i=0; i<a->mbs; i++) {
316     for (j=0; j<bs; j++) {
317       for (k=a->i[i]; k<a->i[i+1]; k++) {
318         for (l=0; l<bs; l++) {
319           aa[count++] = a->a[bs2*k + l*bs + j];
320         }
321       }
322     }
323   }
324   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
325   ierr = PetscFree(aa);CHKERRQ(ierr);
326 
327   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
328   if (file) {
329     fprintf(file,"-matload_block_size %d\n",a->bs);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNC__
335 #define __FUNC__ "MatView_SeqSBAIJ_ASCII"
336 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
337 {
338   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
339   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
340   char              *name;
341   PetscViewerFormat format;
342 
343   PetscFunctionBegin;
344   ierr = PetscObjectGetName((PetscObject)viewer,&name);CHKERRQ(ierr);
345   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
346   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
347     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
348   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
349     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
350   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
351     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
352     for (i=0; i<a->mbs; i++) {
353       for (j=0; j<bs; j++) {
354         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
355         for (k=a->i[i]; k<a->i[i+1]; k++) {
356           for (l=0; l<bs; l++) {
357 #if defined(PETSC_USE_COMPLEX)
358             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
359               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
360                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
361             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
362               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
363                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
364             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
365               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
366             }
367 #else
368             if (a->a[bs2*k + l*bs + j] != 0.0) {
369               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
370             }
371 #endif
372           }
373         }
374         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
375       }
376     }
377     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
378   } else {
379     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
380     for (i=0; i<a->mbs; i++) {
381       for (j=0; j<bs; j++) {
382         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
383         for (k=a->i[i]; k<a->i[i+1]; k++) {
384           for (l=0; l<bs; l++) {
385 #if defined(PETSC_USE_COMPLEX)
386             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
387               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
388                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
390               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
391                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
392             } else {
393               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
394             }
395 #else
396             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
397 #endif
398           }
399         }
400         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
401       }
402     }
403     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
404   }
405   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatView_SeqSBAIJ_Draw_Zoom"
411 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
412 {
413   Mat          A = (Mat) Aa;
414   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)A->data;
415   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
416   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
417   MatScalar    *aa;
418   MPI_Comm     comm;
419   PetscViewer       viewer;
420 
421   PetscFunctionBegin;
422   /*
423       This is nasty. If this is called from an originally parallel matrix
424    then all processes call this,but only the first has the matrix so the
425    rest should return immediately.
426   */
427   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
428   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
429   if (rank) PetscFunctionReturn(0);
430 
431   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
432 
433   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
434   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
435 
436   /* loop over matrix elements drawing boxes */
437   color = PETSC_DRAW_BLUE;
438   for (i=0,row=0; i<mbs; i++,row+=bs) {
439     for (j=a->i[i]; j<a->i[i+1]; j++) {
440       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
441       x_l = a->j[j]*bs; x_r = x_l + 1.0;
442       aa = a->a + j*bs2;
443       for (k=0; k<bs; k++) {
444         for (l=0; l<bs; l++) {
445           if (PetscRealPart(*aa++) >=  0.) continue;
446           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
447         }
448       }
449     }
450   }
451   color = PETSC_DRAW_CYAN;
452   for (i=0,row=0; i<mbs; i++,row+=bs) {
453     for (j=a->i[i]; j<a->i[i+1]; j++) {
454       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
455       x_l = a->j[j]*bs; x_r = x_l + 1.0;
456       aa = a->a + j*bs2;
457       for (k=0; k<bs; k++) {
458         for (l=0; l<bs; l++) {
459           if (PetscRealPart(*aa++) != 0.) continue;
460           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465 
466   color = PETSC_DRAW_RED;
467   for (i=0,row=0; i<mbs; i++,row+=bs) {
468     for (j=a->i[i]; j<a->i[i+1]; j++) {
469       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
470       x_l = a->j[j]*bs; x_r = x_l + 1.0;
471       aa = a->a + j*bs2;
472       for (k=0; k<bs; k++) {
473         for (l=0; l<bs; l++) {
474           if (PetscRealPart(*aa++) <= 0.) continue;
475           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
476         }
477       }
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNC__
484 #define __FUNC__ "MatView_SeqSBAIJ_Draw"
485 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
486 {
487   int          ierr;
488   PetscReal    xl,yl,xr,yr,w,h;
489   PetscDraw         draw;
490   PetscTruth   isnull;
491 
492   PetscFunctionBegin;
493 
494   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
495   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
496 
497   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
498   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
499   xr += w;    yr += h;  xl = -w;     yl = -h;
500   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
501   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
502   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNC__
507 #define __FUNC__ "MatView_SeqSBAIJ"
508 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
509 {
510   int        ierr;
511   PetscTruth issocket,isascii,isbinary,isdraw;
512 
513   PetscFunctionBegin;
514   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
515   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
516   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
517   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
518   if (issocket) {
519     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
520   } else if (isascii){
521     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
522   } else if (isbinary) {
523     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
524   } else if (isdraw) {
525     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
526   } else {
527     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 
533 #undef __FUNC__
534 #define __FUNC__ "MatGetValues_SeqSBAIJ"
535 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
536 {
537   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
538   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
539   int        *ai = a->i,*ailen = a->ilen;
540   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
541   MatScalar  *ap,*aa = a->a,zero = 0.0;
542 
543   PetscFunctionBegin;
544   for (k=0; k<m; k++) { /* loop over rows */
545     row  = im[k]; brow = row/bs;
546     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
547     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
548     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
549     nrow = ailen[brow];
550     for (l=0; l<n; l++) { /* loop over columns */
551       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
552       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
553       col  = in[l] ;
554       bcol = col/bs;
555       cidx = col%bs;
556       ridx = row%bs;
557       high = nrow;
558       low  = 0; /* assume unsorted */
559       while (high-low > 5) {
560         t = (low+high)/2;
561         if (rp[t] > bcol) high = t;
562         else             low  = t;
563       }
564       for (i=low; i<high; i++) {
565         if (rp[i] > bcol) break;
566         if (rp[i] == bcol) {
567           *v++ = ap[bs2*i+bs*cidx+ridx];
568           goto finished;
569         }
570       }
571       *v++ = zero;
572       finished:;
573     }
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatSetValuesBlocked_SeqSBAIJ"
581 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
582 {
583   PetscFunctionBegin;
584   SETERRQ(1,"Function not yet written for SBAIJ format");
585   /* PetscFunctionReturn(0); */
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatAssemblyEnd_SeqSBAIJ"
590 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
591 {
592   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
593   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
594   int        m = A->m,*ip,N,*ailen = a->ilen;
595   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
596   MatScalar  *aa = a->a,*ap;
597 
598   PetscFunctionBegin;
599   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
600 
601   if (m) rmax = ailen[0];
602   for (i=1; i<mbs; i++) {
603     /* move each row back by the amount of empty slots (fshift) before it*/
604     fshift += imax[i-1] - ailen[i-1];
605     rmax   = PetscMax(rmax,ailen[i]);
606     if (fshift) {
607       ip = aj + ai[i]; ap = aa + bs2*ai[i];
608       N = ailen[i];
609       for (j=0; j<N; j++) {
610         ip[j-fshift] = ip[j];
611         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
612       }
613     }
614     ai[i] = ai[i-1] + ailen[i-1];
615   }
616   if (mbs) {
617     fshift += imax[mbs-1] - ailen[mbs-1];
618     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
619   }
620   /* reset ilen and imax for each row */
621   for (i=0; i<mbs; i++) {
622     ailen[i] = imax[i] = ai[i+1] - ai[i];
623   }
624   a->s_nz = ai[mbs];
625 
626   /* diagonals may have moved, so kill the diagonal pointers */
627   if (fshift && a->diag) {
628     ierr = PetscFree(a->diag);CHKERRQ(ierr);
629     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
630     a->diag = 0;
631   }
632   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
633            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
634   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
635            a->reallocs);
636   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
637   a->reallocs          = 0;
638   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
639 
640   PetscFunctionReturn(0);
641 }
642 
643 /*
644    This function returns an array of flags which indicate the locations of contiguous
645    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
646    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
647    Assume: sizes should be long enough to hold all the values.
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
651 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
652 {
653   int        i,j,k,row;
654   PetscTruth flg;
655 
656   PetscFunctionBegin;
657   for (i=0,j=0; i<n; j++) {
658     row = idx[i];
659     if (row%bs!=0) { /* Not the begining of a block */
660       sizes[j] = 1;
661       i++;
662     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
663       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
664       i++;
665     } else { /* Begining of the block, so check if the complete block exists */
666       flg = PETSC_TRUE;
667       for (k=1; k<bs; k++) {
668         if (row+k != idx[i+k]) { /* break in the block */
669           flg = PETSC_FALSE;
670           break;
671         }
672       }
673       if (flg == PETSC_TRUE) { /* No break in the bs */
674         sizes[j] = bs;
675         i+= bs;
676       } else {
677         sizes[j] = 1;
678         i++;
679       }
680     }
681   }
682   *bs_max = j;
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNC__
687 #define __FUNC__ "MatZeroRows_SeqSBAIJ"
688 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag)
689 {
690   Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
691   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
692   int         bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
693   Scalar      zero = 0.0;
694   MatScalar   *aa;
695 
696   PetscFunctionBegin;
697   /* Make a copy of the IS and  sort it */
698   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
699   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
700 
701   /* allocate memory for rows,sizes */
702   ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
703   sizes = rows + is_n;
704 
705   /* initialize copy IS values to rows, and sort them */
706   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
707   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
708   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
709     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
710     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
711   } else {
712     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
713   }
714   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
715 
716   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
717     row   = rows[j];                  /* row to be zeroed */
718     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
719     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
720     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
721     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
722       if (diag) {
723         if (sbaij->ilen[row/bs] > 0) {
724           sbaij->ilen[row/bs] = 1;
725           sbaij->j[sbaij->i[row/bs]] = row/bs;
726           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
727         }
728         /* Now insert all the diagoanl values for this bs */
729         for (k=0; k<bs; k++) {
730           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
731         }
732       } else { /* (!diag) */
733         sbaij->ilen[row/bs] = 0;
734       } /* end (!diag) */
735     } else { /* (sizes[i] != bs), broken block */
736 #if defined (PETSC_USE_DEBUG)
737       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
738 #endif
739       for (k=0; k<count; k++) {
740         aa[0] = zero;
741         aa+=bs;
742       }
743       if (diag) {
744         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
745       }
746     }
747   }
748 
749   ierr = PetscFree(rows);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 /* Only add/insert a(i,j) with i<=j (blocks).
756    Any a(i,j) with i>j input by user is ingored.
757 */
758 
759 #undef __FUNC__
760 #define __FUNC__ "MatSetValues_SeqSBAIJ"
761 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
762 {
763   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
764   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
765   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
766   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
767   int         ridx,cidx,bs2=a->bs2,ierr;
768   MatScalar   *ap,value,*aa=a->a,*bap;
769 
770   PetscFunctionBegin;
771 
772   for (k=0; k<m; k++) { /* loop over added rows */
773     row  = im[k];       /* row number */
774     brow = row/bs;      /* block row number */
775     if (row < 0) continue;
776 #if defined(PETSC_USE_BOPT_g)
777     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
778 #endif
779     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
780     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
781     rmax = imax[brow];  /* maximum space allocated for this row */
782     nrow = ailen[brow]; /* actual length of this row */
783     low  = 0;
784 
785     for (l=0; l<n; l++) { /* loop over added columns */
786       if (in[l] < 0) continue;
787 #if defined(PETSC_USE_BOPT_g)
788       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
789 #endif
790       col = in[l];
791       bcol = col/bs;              /* block col number */
792 
793       if (brow <= bcol){
794         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
795         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
796           /* element value a(k,l) */
797           if (roworiented) {
798             value = v[l + k*n];
799           } else {
800             value = v[k + l*m];
801           }
802 
803           /* move pointer bap to a(k,l) quickly and add/insert value */
804           if (!sorted) low = 0; high = nrow;
805           while (high-low > 7) {
806             t = (low+high)/2;
807             if (rp[t] > bcol) high = t;
808             else              low  = t;
809           }
810           for (i=low; i<high; i++) {
811             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
812             if (rp[i] > bcol) break;
813             if (rp[i] == bcol) {
814               bap  = ap +  bs2*i + bs*cidx + ridx;
815               if (is == ADD_VALUES) *bap += value;
816               else                  *bap  = value;
817               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
818               if (brow == bcol && ridx < cidx){
819                 bap  = ap +  bs2*i + bs*ridx + cidx;
820                 if (is == ADD_VALUES) *bap += value;
821                 else                  *bap  = value;
822               }
823               goto noinsert1;
824             }
825           }
826 
827       if (nonew == 1) goto noinsert1;
828       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
829       if (nrow >= rmax) {
830         /* there is no extra room in row, therefore enlarge */
831         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
832         MatScalar *new_a;
833 
834         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
835 
836         /* Malloc new storage space */
837         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
838         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
839         new_j = (int*)(new_a + bs2*new_nz);
840         new_i = new_j + new_nz;
841 
842         /* copy over old data into new slots */
843         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
844         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
845         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
846         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
847         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
848         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
849         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
850         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
851         /* free up old matrix storage */
852         ierr = PetscFree(a->a);CHKERRQ(ierr);
853         if (!a->singlemalloc) {
854           ierr = PetscFree(a->i);CHKERRQ(ierr);
855           ierr = PetscFree(a->j);CHKERRQ(ierr);
856         }
857         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
858         a->singlemalloc = PETSC_TRUE;
859 
860         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
861         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
862         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
863         a->s_maxnz += bs2*CHUNKSIZE;
864         a->reallocs++;
865         a->s_nz++;
866       }
867 
868       N = nrow++ - 1;
869       /* shift up all the later entries in this row */
870       for (ii=N; ii>=i; ii--) {
871         rp[ii+1] = rp[ii];
872         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
873       }
874       if (N>=i) {
875         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
876       }
877       rp[i]                      = bcol;
878       ap[bs2*i + bs*cidx + ridx] = value;
879       noinsert1:;
880       low = i;
881       /* } */
882         }
883       } /* end of if .. if..  */
884     }                     /* end of loop over added columns */
885     ailen[brow] = nrow;
886   }                       /* end of loop over added rows */
887 
888   PetscFunctionReturn(0);
889 }
890 
891 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
892 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
893 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
894 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
895 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
896 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
897 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
898 extern int MatScale_SeqSBAIJ(Scalar*,Mat);
899 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
900 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
901 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
902 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
903 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
904 extern int MatZeroEntries_SeqSBAIJ(Mat);
905 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
906 
907 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
908 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
909 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
910 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
911 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
912 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
913 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
914 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
915 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
916 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
917 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
918 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
919 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
920 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
921 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
922 
923 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
924 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
925 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
926 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
927 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
928 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
929 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
930 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
931 
932 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
933 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
934 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
935 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
936 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
937 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
938 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
940 
941 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
942 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
943 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
944 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
945 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
946 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
947 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
948 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
949 
950 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
951 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
952 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
953 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
954 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
955 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
956 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
957 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
958 
959 #ifdef HAVE_MatIncompleteCholeskyFactor
960 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
961 #undef __FUNC__
962 #define __FUNC__ "MatIncompleteCholeskyFactor_SeqSBAIJ"
963 int MatIncompleteCholeskyFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
964 {
965   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
966   Mat         outA;
967   int         ierr;
968   PetscTruth  row_identity,col_identity;
969 
970   PetscFunctionBegin;
971   /*
972   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
973   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
974   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
975   if (!row_identity || !col_identity) {
976     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
977   }
978   */
979 
980   outA          = inA;
981   inA->factor   = FACTOR_CHOLESKY;
982 
983   if (!a->diag) {
984     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
985   }
986   /*
987       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
988       for ILU(0) factorization with natural ordering
989   */
990   switch (a->bs) {
991   case 1:
992     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
993     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
994     PetscLoginfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
995   case 2:
996     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
997     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
998     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
999     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1000     break;
1001   case 3:
1002     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1003     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1004     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1005     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1006     break;
1007   case 4:
1008     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1009     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1010     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1011     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1012     break;
1013   case 5:
1014     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1015     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1016     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1017     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1018     break;
1019   case 6:
1020     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1021     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1022     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1023     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1024     break;
1025   case 7:
1026     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1027     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1028     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1029     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1030     break;
1031   default:
1032     a->row        = row;
1033     a->icol       = col;
1034     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1035     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1036 
1037     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1038     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1039     PetscLogObjectParent(inA,a->icol);
1040 
1041     if (!a->solve_work) {
1042       ierr = PetscMalloc((A->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1043       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar));
1044     }
1045   }
1046 
1047   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1048 
1049   PetscFunctionReturn(0);
1050 }
1051 #endif
1052 
1053 #undef __FUNC__
1054 #define __FUNC__ "MatPrintHelp_SeqSBAIJ"
1055 int MatPrintHelp_SeqSBAIJ(Mat A)
1056 {
1057   static PetscTruth called = PETSC_FALSE;
1058   MPI_Comm          comm = A->comm;
1059   int               ierr;
1060 
1061   PetscFunctionBegin;
1062   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1063   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1064   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 EXTERN_C_BEGIN
1069 #undef __FUNC__
1070 #define __FUNC__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1071 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1072 {
1073   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1074   int         i,nz,n;
1075 
1076   PetscFunctionBegin;
1077   nz = baij->s_maxnz;
1078   n  = mat->n;
1079   for (i=0; i<nz; i++) {
1080     baij->j[i] = indices[i];
1081   }
1082   baij->s_nz = nz;
1083   for (i=0; i<n; i++) {
1084     baij->ilen[i] = baij->imax[i];
1085   }
1086 
1087   PetscFunctionReturn(0);
1088 }
1089 EXTERN_C_END
1090 
1091 #undef __FUNC__
1092 #define __FUNC__ "MatSeqSBAIJSetColumnIndices"
1093 /*@
1094     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1095        in the matrix.
1096 
1097   Input Parameters:
1098 +  mat     - the SeqSBAIJ matrix
1099 -  indices - the column indices
1100 
1101   Level: advanced
1102 
1103   Notes:
1104     This can be called if you have precomputed the nonzero structure of the
1105   matrix and want to provide it to the matrix object to improve the performance
1106   of the MatSetValues() operation.
1107 
1108     You MUST have set the correct numbers of nonzeros per row in the call to
1109   MatCreateSeqSBAIJ().
1110 
1111     MUST be called before any calls to MatSetValues()
1112 
1113 .seealso: MatCreateSeqSBAIJ
1114 @*/
1115 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1116 {
1117   int ierr,(*f)(Mat,int *);
1118 
1119   PetscFunctionBegin;
1120   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1121   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1122   if (f) {
1123     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1124   } else {
1125     SETERRQ(1,"Wrong type of matrix to set column indices");
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNC__
1131 #define __FUNC__ "MatSetUpPreallocation_SeqSBAIJ"
1132 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1133 {
1134   int        ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 /* -------------------------------------------------------------------*/
1142 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1143        MatGetRow_SeqSBAIJ,
1144        MatRestoreRow_SeqSBAIJ,
1145        MatMult_SeqSBAIJ_N,
1146        MatMultAdd_SeqSBAIJ_N,
1147        MatMultTranspose_SeqSBAIJ,
1148        MatMultTransposeAdd_SeqSBAIJ,
1149        MatSolve_SeqSBAIJ_N,
1150        0,
1151        0,
1152        0,
1153        0,
1154        MatCholeskyFactor_SeqSBAIJ,
1155        0,
1156        MatTranspose_SeqSBAIJ,
1157        MatGetInfo_SeqSBAIJ,
1158        MatEqual_SeqSBAIJ,
1159        MatGetDiagonal_SeqSBAIJ,
1160        MatDiagonalScale_SeqSBAIJ,
1161        MatNorm_SeqSBAIJ,
1162        0,
1163        MatAssemblyEnd_SeqSBAIJ,
1164        0,
1165        MatSetOption_SeqSBAIJ,
1166        MatZeroEntries_SeqSBAIJ,
1167        MatZeroRows_SeqSBAIJ,
1168        0,
1169        0,
1170        MatCholeskyFactorSymbolic_SeqSBAIJ,
1171        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1172        MatSetUpPreallocation_SeqSBAIJ,
1173        0,
1174        MatGetOwnershipRange_SeqSBAIJ,
1175        0,
1176        MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ,
1177        0,
1178        0,
1179        MatDuplicate_SeqSBAIJ,
1180        0,
1181        0,
1182        0,
1183        0,
1184        0,
1185        MatGetSubMatrices_SeqSBAIJ,
1186        MatIncreaseOverlap_SeqSBAIJ,
1187        MatGetValues_SeqSBAIJ,
1188        0,
1189        MatPrintHelp_SeqSBAIJ,
1190        MatScale_SeqSBAIJ,
1191        0,
1192        0,
1193        0,
1194        MatGetBlockSize_SeqSBAIJ,
1195        MatGetRowIJ_SeqSBAIJ,
1196        MatRestoreRowIJ_SeqSBAIJ,
1197        0,
1198        0,
1199        0,
1200        0,
1201        0,
1202        0,
1203        MatSetValuesBlocked_SeqSBAIJ,
1204        MatGetSubMatrix_SeqSBAIJ,
1205        0,
1206        0,
1207        MatGetMaps_Petsc,
1208        0,
1209        0,
1210        0,
1211        0,
1212        0,
1213        0,
1214        MatGetRowMax_SeqSBAIJ};
1215 
1216 EXTERN_C_BEGIN
1217 #undef __FUNC__
1218 #define __FUNC__ "MatStoreValues_SeqSBAIJ"
1219 int MatStoreValues_SeqSBAIJ(Mat mat)
1220 {
1221   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1222   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1223   int         ierr;
1224 
1225   PetscFunctionBegin;
1226   if (aij->nonew != 1) {
1227     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1228   }
1229 
1230   /* allocate space for values if not already there */
1231   if (!aij->saved_values) {
1232     ierr = PetscMalloc(nz*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1233   }
1234 
1235   /* copy values over */
1236   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 EXTERN_C_END
1240 
1241 EXTERN_C_BEGIN
1242 #undef __FUNC__
1243 #define __FUNC__ "MatRetrieveValues_SeqSBAIJ"
1244 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1245 {
1246   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1247   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1248 
1249   PetscFunctionBegin;
1250   if (aij->nonew != 1) {
1251     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1252   }
1253   if (!aij->saved_values) {
1254     SETERRQ(1,"Must call MatStoreValues(A);first");
1255   }
1256 
1257   /* copy values over */
1258   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 EXTERN_C_END
1262 
1263 EXTERN_C_BEGIN
1264 #undef __FUNC__
1265 #define __FUNC__ "MatCreate_SeqSBAIJ"
1266 int MatCreate_SeqSBAIJ(Mat B)
1267 {
1268   Mat_SeqSBAIJ *b;
1269   int          ierr,size;
1270 
1271   PetscFunctionBegin;
1272   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1273   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1274   B->m = B->M = PetscMax(B->m,B->M);
1275   B->n = B->N = PetscMax(B->n,B->N);
1276 
1277   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1278   B->data = (void*)b;
1279   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1280   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1281   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1282   B->ops->view        = MatView_SeqSBAIJ;
1283   B->factor           = 0;
1284   B->lupivotthreshold = 1.0;
1285   B->mapping          = 0;
1286   b->row              = 0;
1287   b->icol             = 0;
1288   b->reallocs         = 0;
1289   b->saved_values     = 0;
1290 
1291   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1292   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1293 
1294   b->sorted           = PETSC_FALSE;
1295   b->roworiented      = PETSC_TRUE;
1296   b->nonew            = 0;
1297   b->diag             = 0;
1298   b->solve_work       = 0;
1299   b->mult_work        = 0;
1300   b->spptr            = 0;
1301   b->keepzeroedrows   = PETSC_FALSE;
1302 
1303   b->inew             = 0;
1304   b->jnew             = 0;
1305   b->anew             = 0;
1306   b->a2anew           = 0;
1307   b->permute          = PETSC_FALSE;
1308 
1309   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1310                                      "MatStoreValues_SeqSBAIJ",
1311                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1313                                      "MatRetrieveValues_SeqSBAIJ",
1314                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1316                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1317                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 EXTERN_C_END
1321 
1322 #undef __FUNC__
1323 #define __FUNC__ "MatSeqSBAIJSetPreallocation"
1324 /*@C
1325    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1326    compressed row) format.  For good matrix assembly performance the
1327    user should preallocate the matrix storage by setting the parameter nz
1328    (or the array nnz).  By setting these parameters accurately, performance
1329    during matrix assembly can be increased by more than a factor of 50.
1330 
1331    Collective on Mat
1332 
1333    Input Parameters:
1334 +  A - the symmetric matrix
1335 .  bs - size of block
1336 .  nz - number of block nonzeros per block row (same for all rows)
1337 -  nnz - array containing the number of block nonzeros in the various block rows
1338          (possibly different for each block row) or PETSC_NULL
1339 
1340    Options Database Keys:
1341 .   -mat_no_unroll - uses code that does not unroll the loops in the
1342                      block calculations (much slower)
1343 .    -mat_block_size - size of the blocks to use
1344 
1345    Level: intermediate
1346 
1347    Notes:
1348    The block AIJ format is compatible with standard Fortran 77
1349    storage.  That is, the stored row and column indices can begin at
1350    either one (as in Fortran) or zero.  See the users' manual for details.
1351 
1352    Specify the preallocated storage with either nz or nnz (not both).
1353    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1354    allocation.  For additional details, see the users manual chapter on
1355    matrices.
1356 
1357 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1358 @*/
1359 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1360 {
1361   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1362   int          i,len,ierr,mbs,bs2;
1363   PetscTruth   flg;
1364   int          s_nz;
1365 
1366   PetscFunctionBegin;
1367   B->preallocated = PETSC_TRUE;
1368   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1369   mbs  = B->m/bs;
1370   bs2  = bs*bs;
1371 
1372   if (mbs*bs != B->m) {
1373     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1374   }
1375 
1376   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than -2: value %d",nz);
1377   if (nnz) {
1378     for (i=0; i<mbs; i++) {
1379       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1380     }
1381   }
1382 
1383   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1384   if (!flg) {
1385     switch (bs) {
1386     case 1:
1387       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1388       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1389       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1390       B->ops->mult            = MatMult_SeqSBAIJ_1;
1391       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1392       break;
1393     case 2:
1394       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1395       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1396       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1397       B->ops->mult            = MatMult_SeqSBAIJ_2;
1398       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1399       break;
1400     case 3:
1401       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1402       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1403       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1404       B->ops->mult            = MatMult_SeqSBAIJ_3;
1405       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1406       break;
1407     case 4:
1408       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1409       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1410       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1411       B->ops->mult            = MatMult_SeqSBAIJ_4;
1412       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1413       break;
1414     case 5:
1415       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1416       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1417       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1418       B->ops->mult            = MatMult_SeqSBAIJ_5;
1419       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1420       break;
1421     case 6:
1422       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1423       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1424       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1425       B->ops->mult            = MatMult_SeqSBAIJ_6;
1426       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1427       break;
1428     case 7:
1429       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1430       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1431       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1432       B->ops->mult            = MatMult_SeqSBAIJ_7;
1433       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1434       break;
1435     }
1436   }
1437 
1438   b->mbs = mbs;
1439   b->nbs = mbs;
1440   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1441   if (!nnz) {
1442     if (nz == PETSC_DEFAULT) nz = 5;
1443     else if (nz <= 0)        nz = 1;
1444     for (i=0; i<mbs; i++) {
1445       b->imax[i] = nz;
1446     }
1447     nz = nz*mbs; /* total nz */
1448   } else {
1449     nz = 0;
1450     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1451   }
1452   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1453   s_nz = nz;
1454 
1455   /* allocate the matrix space */
1456   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1457   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1458   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1459   b->j = (int*)(b->a + s_nz*bs2);
1460   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1461   b->i = b->j + s_nz;
1462   b->singlemalloc = PETSC_TRUE;
1463 
1464   /* pointer to beginning of each row */
1465   b->i[0] = 0;
1466   for (i=1; i<mbs+1; i++) {
1467     b->i[i] = b->i[i-1] + b->imax[i-1];
1468   }
1469 
1470   /* b->ilen will count nonzeros in each block row so far. */
1471   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1472   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1473   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1474 
1475   b->bs               = bs;
1476   b->bs2              = bs2;
1477   b->s_nz             = 0;
1478   b->s_maxnz          = s_nz*bs2;
1479 
1480   b->inew             = 0;
1481   b->jnew             = 0;
1482   b->anew             = 0;
1483   b->a2anew           = 0;
1484   b->permute          = PETSC_FALSE;
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 
1489 #undef __FUNC__
1490 #define __FUNC__ "MatCreateSeqSBAIJ"
1491 /*@C
1492    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1493    compressed row) format.  For good matrix assembly performance the
1494    user should preallocate the matrix storage by setting the parameter nz
1495    (or the array nnz).  By setting these parameters accurately, performance
1496    during matrix assembly can be increased by more than a factor of 50.
1497 
1498    Collective on MPI_Comm
1499 
1500    Input Parameters:
1501 +  comm - MPI communicator, set to PETSC_COMM_SELF
1502 .  bs - size of block
1503 .  m - number of rows, or number of columns
1504 .  nz - number of block nonzeros per block row (same for all rows)
1505 -  nnz - array containing the number of block nonzeros in the various block rows
1506          (possibly different for each block row) or PETSC_NULL
1507 
1508    Output Parameter:
1509 .  A - the symmetric matrix
1510 
1511    Options Database Keys:
1512 .   -mat_no_unroll - uses code that does not unroll the loops in the
1513                      block calculations (much slower)
1514 .    -mat_block_size - size of the blocks to use
1515 
1516    Level: intermediate
1517 
1518    Notes:
1519    The block AIJ format is compatible with standard Fortran 77
1520    storage.  That is, the stored row and column indices can begin at
1521    either one (as in Fortran) or zero.  See the users' manual for details.
1522 
1523    Specify the preallocated storage with either nz or nnz (not both).
1524    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1525    allocation.  For additional details, see the users manual chapter on
1526    matrices.
1527 
1528 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1529 @*/
1530 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1531 {
1532   int ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1536   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1537   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNC__
1542 #define __FUNC__ "MatDuplicate_SeqSBAIJ"
1543 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1544 {
1545   Mat         C;
1546   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1547   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1548 
1549   PetscFunctionBegin;
1550   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1551 
1552   *B = 0;
1553   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1554   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1555   c    = (Mat_SeqSBAIJ*)C->data;
1556 
1557   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1558   C->preallocated   = PETSC_TRUE;
1559   C->factor         = A->factor;
1560   c->row            = 0;
1561   c->icol           = 0;
1562   c->saved_values   = 0;
1563   c->keepzeroedrows = a->keepzeroedrows;
1564   C->assembled      = PETSC_TRUE;
1565 
1566   c->bs         = a->bs;
1567   c->bs2        = a->bs2;
1568   c->mbs        = a->mbs;
1569   c->nbs        = a->nbs;
1570 
1571   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1572   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1573   for (i=0; i<mbs; i++) {
1574     c->imax[i] = a->imax[i];
1575     c->ilen[i] = a->ilen[i];
1576   }
1577 
1578   /* allocate the matrix space */
1579   c->singlemalloc = PETSC_TRUE;
1580   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1581   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1582   c->j = (int*)(c->a + nz*bs2);
1583   c->i = c->j + nz;
1584   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1585   if (mbs > 0) {
1586     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1587     if (cpvalues == MAT_COPY_VALUES) {
1588       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1589     } else {
1590       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1591     }
1592   }
1593 
1594   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1595   c->sorted      = a->sorted;
1596   c->roworiented = a->roworiented;
1597   c->nonew       = a->nonew;
1598 
1599   if (a->diag) {
1600     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1601     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1602     for (i=0; i<mbs; i++) {
1603       c->diag[i] = a->diag[i];
1604     }
1605   } else c->diag        = 0;
1606   c->s_nz               = a->s_nz;
1607   c->s_maxnz            = a->s_maxnz;
1608   c->solve_work         = 0;
1609   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1610   c->mult_work          = 0;
1611   *B = C;
1612   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 EXTERN_C_BEGIN
1617 #undef __FUNC__
1618 #define __FUNC__ "MatLoad_SeqSBAIJ"
1619 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1620 {
1621   Mat_SeqSBAIJ  *a;
1622   Mat          B;
1623   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1624   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount;
1625   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1626   int          *masked,nmask,tmp,bs2,ishift;
1627   Scalar       *aa;
1628   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1629 
1630   PetscFunctionBegin;
1631   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1632   bs2  = bs*bs;
1633 
1634   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1635   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1636   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1637   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1638   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1639   M = header[1]; N = header[2]; nz = header[3];
1640 
1641   if (header[3] < 0) {
1642     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1643   }
1644 
1645   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1646 
1647   /*
1648      This code adds extra rows to make sure the number of rows is
1649     divisible by the blocksize
1650   */
1651   mbs        = M/bs;
1652   extra_rows = bs - M + bs*(mbs);
1653   if (extra_rows == bs) extra_rows = 0;
1654   else                  mbs++;
1655   if (extra_rows) {
1656     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1657   }
1658 
1659   /* read in row lengths */
1660   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1661   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1662   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1663 
1664   /* read in column indices */
1665   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1666   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1667   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1668 
1669   /* loop over row lengths determining block row lengths */
1670   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1671   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1672   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1673   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1674   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1675   masked   = mask + mbs;
1676   rowcount = 0; nzcount = 0;
1677   for (i=0; i<mbs; i++) {
1678     nmask = 0;
1679     for (j=0; j<bs; j++) {
1680       kmax = rowlengths[rowcount];
1681       for (k=0; k<kmax; k++) {
1682         tmp = jj[nzcount++]/bs;   /* block col. index */
1683         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1684       }
1685       rowcount++;
1686     }
1687     s_browlengths[i] += nmask;
1688     browlengths[i]    = 2*s_browlengths[i];
1689 
1690     /* zero out the mask elements we set */
1691     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1692   }
1693 
1694   /* create our matrix */
1695   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1696   B = *A;
1697   a = (Mat_SeqSBAIJ*)B->data;
1698 
1699   /* set matrix "i" values */
1700   a->i[0] = 0;
1701   for (i=1; i<= mbs; i++) {
1702     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1703     a->ilen[i-1] = s_browlengths[i-1];
1704   }
1705   a->s_nz         = 0;
1706   for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i];
1707 
1708   /* read in nonzero values */
1709   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1710   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1711   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1712 
1713   /* set "a" and "j" values into matrix */
1714   nzcount = 0; jcount = 0;
1715   for (i=0; i<mbs; i++) {
1716     nzcountb = nzcount;
1717     nmask    = 0;
1718     for (j=0; j<bs; j++) {
1719       kmax = rowlengths[i*bs+j];
1720       for (k=0; k<kmax; k++) {
1721         tmp = jj[nzcount++]/bs; /* block col. index */
1722         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1723       }
1724       rowcount++;
1725     }
1726     /* sort the masked values */
1727     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1728 
1729     /* set "j" values into matrix */
1730     maskcount = 1;
1731     for (j=0; j<nmask; j++) {
1732       a->j[jcount++]  = masked[j];
1733       mask[masked[j]] = maskcount++;
1734     }
1735 
1736     /* set "a" values into matrix */
1737     ishift = bs2*a->i[i];
1738     for (j=0; j<bs; j++) {
1739       kmax = rowlengths[i*bs+j];
1740       for (k=0; k<kmax; k++) {
1741         tmp       = jj[nzcountb]/bs ; /* block col. index */
1742         if (tmp >= i){
1743           block     = mask[tmp] - 1;
1744           point     = jj[nzcountb] - bs*tmp;
1745           idx       = ishift + bs2*block + j + bs*point;
1746           a->a[idx] = aa[nzcountb];
1747         }
1748         nzcountb++;
1749       }
1750     }
1751     /* zero out the mask elements we set */
1752     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1753   }
1754   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1755 
1756   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1757   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1758   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1759   ierr = PetscFree(aa);CHKERRQ(ierr);
1760   ierr = PetscFree(jj);CHKERRQ(ierr);
1761   ierr = PetscFree(mask);CHKERRQ(ierr);
1762 
1763   B->assembled = PETSC_TRUE;
1764   ierr = MatView_Private(B);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 EXTERN_C_END
1768 
1769 
1770 
1771