xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision b5758dff40dc4232f7fa9713b5481f7e0e0d5d24)
1 /*$Id: sbaij.c,v 1.47 2001/01/19 23:20:46 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                            /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 #include "src/mat/impls/sbaij/seq/sbaij.h"
12 
13 #define CHUNKSIZE  10
14 
15 /*
16      Checks for missing diagonals
17 */
18 #undef __FUNC__
19 #define __FUNC__ "MatMissingDiagonal_SeqSBAIJ"
20 int MatMissingDiagonal_SeqSBAIJ(Mat A)
21 {
22   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23   int         *diag,*jj = a->j,i,ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
27   diag = a->diag;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
31     }
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNC__
37 #define __FUNC__ "MatMarkDiagonal_SeqSBAIJ"
38 int MatMarkDiagonal_SeqSBAIJ(Mat A)
39 {
40   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
41   int          i,j,*diag,m = a->mbs,ierr;
42 
43   PetscFunctionBegin;
44   if (a->diag) PetscFunctionReturn(0);
45 
46   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
47   PetscLogObjectMemory(A,(m+1)*sizeof(int));
48   for (i=0; i<m; i++) {
49     diag[i] = a->i[i+1];
50     for (j=a->i[i]; j<a->i[i+1]; j++) {
51       if (a->j[j] == i) {
52         diag[i] = j;
53         break;
54       }
55     }
56   }
57   a->diag = diag;
58   PetscFunctionReturn(0);
59 }
60 
61 
62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
63 
64 #undef __FUNC__
65 #define __FUNC__ "MatGetRowIJ_SeqSBAIJ"
66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
67 {
68   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
69 
70   PetscFunctionBegin;
71   if (ia) {
72     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
73   }
74   *nn = a->mbs;
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNC__
79 #define __FUNC__ "MatRestoreRowIJ_SeqSBAIJ"
80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
81 {
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatGetBlockSize_SeqSBAIJ"
90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
91 {
92   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
93 
94   PetscFunctionBegin;
95   *bs = sbaij->bs;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNC__
100 #define __FUNC__ "MatDestroy_SeqSBAIJ"
101 int MatDestroy_SeqSBAIJ(Mat A)
102 {
103   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
104   int         ierr;
105 
106   PetscFunctionBegin;
107 #if defined(PETSC_USE_LOG)
108   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
109 #endif
110   ierr = PetscFree(a->a);CHKERRQ(ierr);
111   if (!a->singlemalloc) {
112     ierr = PetscFree(a->i);CHKERRQ(ierr);
113     ierr = PetscFree(a->j);CHKERRQ(ierr);
114   }
115   if (a->row) {
116     ierr = ISDestroy(a->row);CHKERRQ(ierr);
117   }
118   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
120   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
121   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
122   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
125 
126   if (a->inew){
127     ierr = PetscFree(a->inew);CHKERRQ(ierr);
128     a->inew = 0;
129   }
130   ierr = PetscFree(a);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNC__
135 #define __FUNC__ "MatSetOption_SeqSBAIJ"
136 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
137 {
138   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
139 
140   PetscFunctionBegin;
141   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
142   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
143   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
144   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
145   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
146   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
147   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
148   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
149   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
150   else if (op == MAT_ROWS_SORTED ||
151            op == MAT_ROWS_UNSORTED ||
152            op == MAT_SYMMETRIC ||
153            op == MAT_STRUCTURALLY_SYMMETRIC ||
154            op == MAT_YES_NEW_DIAGONALS ||
155            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
156            op == MAT_USE_HASH_TABLE) {
157     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
158   } else if (op == MAT_NO_NEW_DIAGONALS) {
159     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
160   } else {
161     SETERRQ(PETSC_ERR_SUP,"unknown option");
162   }
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNC__
167 #define __FUNC__ "MatGetOwnershipRange_SeqSBAIJ"
168 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n)
169 {
170   PetscFunctionBegin;
171   *m = 0;
172   *n = A->m;
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNC__
177 #define __FUNC__ "MatGetRow_SeqSBAIJ"
178 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v)
179 {
180   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
181   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
182   MatScalar    *aa,*aa_i;
183   Scalar       *v_i;
184 
185   PetscFunctionBegin;
186   bs  = a->bs;
187   ai  = a->i;
188   aj  = a->j;
189   aa  = a->a;
190   bs2 = a->bs2;
191 
192   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
193 
194   bn  = row/bs;   /* Block number */
195   bp  = row % bs; /* Block position */
196   M   = ai[bn+1] - ai[bn];
197   *ncols = bs*M;
198 
199   if (v) {
200     *v = 0;
201     if (*ncols) {
202       ierr = PetscMalloc((*ncols+row)*sizeof(Scalar),v);CHKERRQ(ierr);
203       for (i=0; i<M; i++) { /* for each block in the block row */
204         v_i  = *v + i*bs;
205         aa_i = aa + bs2*(ai[bn] + i);
206         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
207       }
208     }
209   }
210 
211   if (cols) {
212     *cols = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         cols_i = *cols + i*bs;
217         itmp  = bs*aj[ai[bn] + i];
218         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
219       }
220     }
221   }
222 
223   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
224   /* this segment is currently removed, so only entries in the upper triangle are obtained */
225 #ifdef column_search
226   v_i    = *v    + M*bs;
227   cols_i = *cols + M*bs;
228   for (i=0; i<bn; i++){ /* for each block row */
229     M = ai[i+1] - ai[i];
230     for (j=0; j<M; j++){
231       itmp = aj[ai[i] + j];    /* block column value */
232       if (itmp == bn){
233         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
234         for (k=0; k<bs; k++) {
235           *cols_i++ = i*bs+k;
236           *v_i++    = aa_i[k];
237         }
238         *ncols += bs;
239         break;
240       }
241     }
242   }
243 #endif
244 
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNC__
249 #define __FUNC__ "MatRestoreRow_SeqSBAIJ"
250 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
251 {
252   int ierr;
253 
254   PetscFunctionBegin;
255   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
256   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNC__
261 #define __FUNC__ "MatTranspose_SeqSBAIJ"
262 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
263 {
264   PetscFunctionBegin;
265   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
266   /* PetscFunctionReturn(0); */
267 }
268 
269 #undef __FUNC__
270 #define __FUNC__ "MatView_SeqSBAIJ_Binary"
271 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
272 {
273   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
274   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
275   Scalar      *aa;
276   FILE        *file;
277 
278   PetscFunctionBegin;
279   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
280   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
281   col_lens[0] = MAT_COOKIE;
282 
283   col_lens[1] = A->m;
284   col_lens[2] = A->m;
285   col_lens[3] = a->s_nz*bs2;
286 
287   /* store lengths of each row and write (including header) to file */
288   count = 0;
289   for (i=0; i<a->mbs; i++) {
290     for (j=0; j<bs; j++) {
291       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
292     }
293   }
294   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
295   ierr = PetscFree(col_lens);CHKERRQ(ierr);
296 
297   /* store column indices (zero start index) */
298   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
299   count = 0;
300   for (i=0; i<a->mbs; i++) {
301     for (j=0; j<bs; j++) {
302       for (k=a->i[i]; k<a->i[i+1]; k++) {
303         for (l=0; l<bs; l++) {
304           jj[count++] = bs*a->j[k] + l;
305         }
306       }
307     }
308   }
309   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
310   ierr = PetscFree(jj);CHKERRQ(ierr);
311 
312   /* store nonzero values */
313   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
314   count = 0;
315   for (i=0; i<a->mbs; i++) {
316     for (j=0; j<bs; j++) {
317       for (k=a->i[i]; k<a->i[i+1]; k++) {
318         for (l=0; l<bs; l++) {
319           aa[count++] = a->a[bs2*k + l*bs + j];
320         }
321       }
322     }
323   }
324   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
325   ierr = PetscFree(aa);CHKERRQ(ierr);
326 
327   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
328   if (file) {
329     fprintf(file,"-matload_block_size %d\n",a->bs);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNC__
335 #define __FUNC__ "MatView_SeqSBAIJ_ASCII"
336 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
337 {
338   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
339   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
340   char              *name;
341   PetscViewerFormat format;
342 
343   PetscFunctionBegin;
344   ierr = PetscObjectGetName((PetscObject)viewer,&name);CHKERRQ(ierr);
345   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
346   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
347     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
348   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
349     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
350   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
351     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
352     for (i=0; i<a->mbs; i++) {
353       for (j=0; j<bs; j++) {
354         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
355         for (k=a->i[i]; k<a->i[i+1]; k++) {
356           for (l=0; l<bs; l++) {
357 #if defined(PETSC_USE_COMPLEX)
358             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
359               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
360                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
361             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
362               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
363                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
364             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
365               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
366             }
367 #else
368             if (a->a[bs2*k + l*bs + j] != 0.0) {
369               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
370             }
371 #endif
372           }
373         }
374         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
375       }
376     }
377     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
378   } else {
379     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
380     for (i=0; i<a->mbs; i++) {
381       for (j=0; j<bs; j++) {
382         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
383         for (k=a->i[i]; k<a->i[i+1]; k++) {
384           for (l=0; l<bs; l++) {
385 #if defined(PETSC_USE_COMPLEX)
386             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
387               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
388                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
390               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
391                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
392             } else {
393               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
394             }
395 #else
396             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
397 #endif
398           }
399         }
400         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
401       }
402     }
403     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
404   }
405   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNC__
410 #define __FUNC__ "MatView_SeqSBAIJ_Draw_Zoom"
411 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
412 {
413   Mat          A = (Mat) Aa;
414   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)A->data;
415   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
416   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
417   MatScalar    *aa;
418   MPI_Comm     comm;
419   PetscViewer       viewer;
420 
421   PetscFunctionBegin;
422   /*
423       This is nasty. If this is called from an originally parallel matrix
424    then all processes call this,but only the first has the matrix so the
425    rest should return immediately.
426   */
427   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
428   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
429   if (rank) PetscFunctionReturn(0);
430 
431   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
432 
433   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
434   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
435 
436   /* loop over matrix elements drawing boxes */
437   color = PETSC_DRAW_BLUE;
438   for (i=0,row=0; i<mbs; i++,row+=bs) {
439     for (j=a->i[i]; j<a->i[i+1]; j++) {
440       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
441       x_l = a->j[j]*bs; x_r = x_l + 1.0;
442       aa = a->a + j*bs2;
443       for (k=0; k<bs; k++) {
444         for (l=0; l<bs; l++) {
445           if (PetscRealPart(*aa++) >=  0.) continue;
446           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
447         }
448       }
449     }
450   }
451   color = PETSC_DRAW_CYAN;
452   for (i=0,row=0; i<mbs; i++,row+=bs) {
453     for (j=a->i[i]; j<a->i[i+1]; j++) {
454       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
455       x_l = a->j[j]*bs; x_r = x_l + 1.0;
456       aa = a->a + j*bs2;
457       for (k=0; k<bs; k++) {
458         for (l=0; l<bs; l++) {
459           if (PetscRealPart(*aa++) != 0.) continue;
460           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465 
466   color = PETSC_DRAW_RED;
467   for (i=0,row=0; i<mbs; i++,row+=bs) {
468     for (j=a->i[i]; j<a->i[i+1]; j++) {
469       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
470       x_l = a->j[j]*bs; x_r = x_l + 1.0;
471       aa = a->a + j*bs2;
472       for (k=0; k<bs; k++) {
473         for (l=0; l<bs; l++) {
474           if (PetscRealPart(*aa++) <= 0.) continue;
475           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
476         }
477       }
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNC__
484 #define __FUNC__ "MatView_SeqSBAIJ_Draw"
485 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
486 {
487   int          ierr;
488   PetscReal    xl,yl,xr,yr,w,h;
489   PetscDraw         draw;
490   PetscTruth   isnull;
491 
492   PetscFunctionBegin;
493 
494   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
495   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
496 
497   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
498   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
499   xr += w;    yr += h;  xl = -w;     yl = -h;
500   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
501   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
502   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNC__
507 #define __FUNC__ "MatView_SeqSBAIJ"
508 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
509 {
510   int        ierr;
511   PetscTruth issocket,isascii,isbinary,isdraw;
512 
513   PetscFunctionBegin;
514   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
515   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
516   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
517   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
518   if (issocket) {
519     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
520   } else if (isascii){
521     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
522   } else if (isbinary) {
523     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
524   } else if (isdraw) {
525     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
526   } else {
527     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 
533 #undef __FUNC__
534 #define __FUNC__ "MatGetValues_SeqSBAIJ"
535 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
536 {
537   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
538   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
539   int        *ai = a->i,*ailen = a->ilen;
540   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
541   MatScalar  *ap,*aa = a->a,zero = 0.0;
542 
543   PetscFunctionBegin;
544   for (k=0; k<m; k++) { /* loop over rows */
545     row  = im[k]; brow = row/bs;
546     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
547     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
548     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
549     nrow = ailen[brow];
550     for (l=0; l<n; l++) { /* loop over columns */
551       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
552       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
553       col  = in[l] ;
554       bcol = col/bs;
555       cidx = col%bs;
556       ridx = row%bs;
557       high = nrow;
558       low  = 0; /* assume unsorted */
559       while (high-low > 5) {
560         t = (low+high)/2;
561         if (rp[t] > bcol) high = t;
562         else             low  = t;
563       }
564       for (i=low; i<high; i++) {
565         if (rp[i] > bcol) break;
566         if (rp[i] == bcol) {
567           *v++ = ap[bs2*i+bs*cidx+ridx];
568           goto finished;
569         }
570       }
571       *v++ = zero;
572       finished:;
573     }
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatSetValuesBlocked_SeqSBAIJ"
581 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
582 {
583   PetscFunctionBegin;
584   SETERRQ(1,"Function not yet written for SBAIJ format");
585   /* PetscFunctionReturn(0); */
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatAssemblyEnd_SeqSBAIJ"
590 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
591 {
592   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
593   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
594   int        m = A->m,*ip,N,*ailen = a->ilen;
595   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
596   MatScalar  *aa = a->a,*ap;
597 
598   PetscFunctionBegin;
599   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
600 
601   if (m) rmax = ailen[0];
602   for (i=1; i<mbs; i++) {
603     /* move each row back by the amount of empty slots (fshift) before it*/
604     fshift += imax[i-1] - ailen[i-1];
605     rmax   = PetscMax(rmax,ailen[i]);
606     if (fshift) {
607       ip = aj + ai[i]; ap = aa + bs2*ai[i];
608       N = ailen[i];
609       for (j=0; j<N; j++) {
610         ip[j-fshift] = ip[j];
611         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
612       }
613     }
614     ai[i] = ai[i-1] + ailen[i-1];
615   }
616   if (mbs) {
617     fshift += imax[mbs-1] - ailen[mbs-1];
618     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
619   }
620   /* reset ilen and imax for each row */
621   for (i=0; i<mbs; i++) {
622     ailen[i] = imax[i] = ai[i+1] - ai[i];
623   }
624   a->s_nz = ai[mbs];
625 
626   /* diagonals may have moved, so kill the diagonal pointers */
627   if (fshift && a->diag) {
628     ierr = PetscFree(a->diag);CHKERRQ(ierr);
629     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
630     a->diag = 0;
631   }
632   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
633            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
634   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
635            a->reallocs);
636   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
637   a->reallocs          = 0;
638   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
639 
640   PetscFunctionReturn(0);
641 }
642 
643 /*
644    This function returns an array of flags which indicate the locations of contiguous
645    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
646    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
647    Assume: sizes should be long enough to hold all the values.
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
651 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
652 {
653   int        i,j,k,row;
654   PetscTruth flg;
655 
656   PetscFunctionBegin;
657   for (i=0,j=0; i<n; j++) {
658     row = idx[i];
659     if (row%bs!=0) { /* Not the begining of a block */
660       sizes[j] = 1;
661       i++;
662     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
663       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
664       i++;
665     } else { /* Begining of the block, so check if the complete block exists */
666       flg = PETSC_TRUE;
667       for (k=1; k<bs; k++) {
668         if (row+k != idx[i+k]) { /* break in the block */
669           flg = PETSC_FALSE;
670           break;
671         }
672       }
673       if (flg == PETSC_TRUE) { /* No break in the bs */
674         sizes[j] = bs;
675         i+= bs;
676       } else {
677         sizes[j] = 1;
678         i++;
679       }
680     }
681   }
682   *bs_max = j;
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNC__
687 #define __FUNC__ "MatZeroRows_SeqSBAIJ"
688 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag)
689 {
690   Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
691   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
692   int         bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
693   Scalar      zero = 0.0;
694   MatScalar   *aa;
695 
696   PetscFunctionBegin;
697   /* Make a copy of the IS and  sort it */
698   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
699   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
700 
701   /* allocate memory for rows,sizes */
702   ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
703   sizes = rows + is_n;
704 
705   /* initialize copy IS values to rows, and sort them */
706   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
707   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
708   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
709     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
710     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
711   } else {
712     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
713   }
714   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
715 
716   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
717     row   = rows[j];                  /* row to be zeroed */
718     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
719     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
720     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
721     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
722       if (diag) {
723         if (sbaij->ilen[row/bs] > 0) {
724           sbaij->ilen[row/bs] = 1;
725           sbaij->j[sbaij->i[row/bs]] = row/bs;
726           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
727         }
728         /* Now insert all the diagoanl values for this bs */
729         for (k=0; k<bs; k++) {
730           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
731         }
732       } else { /* (!diag) */
733         sbaij->ilen[row/bs] = 0;
734       } /* end (!diag) */
735     } else { /* (sizes[i] != bs), broken block */
736 #if defined (PETSC_USE_DEBUG)
737       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
738 #endif
739       for (k=0; k<count; k++) {
740         aa[0] = zero;
741         aa+=bs;
742       }
743       if (diag) {
744         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
745       }
746     }
747   }
748 
749   ierr = PetscFree(rows);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 /* Only add/insert a(i,j) with i<=j (blocks).
756    Any a(i,j) with i>j input by user is ingored.
757 */
758 
759 #undef __FUNC__
760 #define __FUNC__ "MatSetValues_SeqSBAIJ"
761 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
762 {
763   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
764   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
765   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
766   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
767   int         ridx,cidx,bs2=a->bs2,ierr;
768   MatScalar   *ap,value,*aa=a->a,*bap;
769 
770   PetscFunctionBegin;
771 
772   for (k=0; k<m; k++) { /* loop over added rows */
773     row  = im[k];       /* row number */
774     brow = row/bs;      /* block row number */
775     if (row < 0) continue;
776 #if defined(PETSC_USE_BOPT_g)
777     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
778 #endif
779     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
780     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
781     rmax = imax[brow];  /* maximum space allocated for this row */
782     nrow = ailen[brow]; /* actual length of this row */
783     low  = 0;
784 
785     for (l=0; l<n; l++) { /* loop over added columns */
786       if (in[l] < 0) continue;
787 #if defined(PETSC_USE_BOPT_g)
788       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
789 #endif
790       col = in[l];
791       bcol = col/bs;              /* block col number */
792 
793       if (brow <= bcol){
794         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
795         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
796           /* element value a(k,l) */
797           if (roworiented) {
798             value = v[l + k*n];
799           } else {
800             value = v[k + l*m];
801           }
802 
803           /* move pointer bap to a(k,l) quickly and add/insert value */
804           if (!sorted) low = 0; high = nrow;
805           while (high-low > 7) {
806             t = (low+high)/2;
807             if (rp[t] > bcol) high = t;
808             else              low  = t;
809           }
810           for (i=low; i<high; i++) {
811             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
812             if (rp[i] > bcol) break;
813             if (rp[i] == bcol) {
814               bap  = ap +  bs2*i + bs*cidx + ridx;
815               if (is == ADD_VALUES) *bap += value;
816               else                  *bap  = value;
817               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
818               if (brow == bcol && ridx < cidx){
819                 bap  = ap +  bs2*i + bs*ridx + cidx;
820                 if (is == ADD_VALUES) *bap += value;
821                 else                  *bap  = value;
822               }
823               goto noinsert1;
824             }
825           }
826 
827       if (nonew == 1) goto noinsert1;
828       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
829       if (nrow >= rmax) {
830         /* there is no extra room in row, therefore enlarge */
831         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
832         MatScalar *new_a;
833 
834         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
835 
836         /* Malloc new storage space */
837         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
838         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
839         new_j = (int*)(new_a + bs2*new_nz);
840         new_i = new_j + new_nz;
841 
842         /* copy over old data into new slots */
843         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
844         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
845         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
846         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
847         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
848         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
849         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
850         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
851         /* free up old matrix storage */
852         ierr = PetscFree(a->a);CHKERRQ(ierr);
853         if (!a->singlemalloc) {
854           ierr = PetscFree(a->i);CHKERRQ(ierr);
855           ierr = PetscFree(a->j);CHKERRQ(ierr);
856         }
857         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
858         a->singlemalloc = PETSC_TRUE;
859 
860         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
861         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
862         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
863         a->s_maxnz += bs2*CHUNKSIZE;
864         a->reallocs++;
865         a->s_nz++;
866       }
867 
868       N = nrow++ - 1;
869       /* shift up all the later entries in this row */
870       for (ii=N; ii>=i; ii--) {
871         rp[ii+1] = rp[ii];
872         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
873       }
874       if (N>=i) {
875         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
876       }
877       rp[i]                      = bcol;
878       ap[bs2*i + bs*cidx + ridx] = value;
879       noinsert1:;
880       low = i;
881       /* } */
882         }
883       } /* end of if .. if..  */
884     }                     /* end of loop over added columns */
885     ailen[brow] = nrow;
886   }                       /* end of loop over added rows */
887 
888   PetscFunctionReturn(0);
889 }
890 
891 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
892 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
893 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
894 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
895 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
896 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
897 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
898 extern int MatScale_SeqSBAIJ(Scalar*,Mat);
899 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
900 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
901 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
902 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
903 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
904 extern int MatZeroEntries_SeqSBAIJ(Mat);
905 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
906 
907 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
908 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
909 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
910 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
911 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
912 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
913 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
914 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
915 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
916 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
917 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
918 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
919 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
920 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
921 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
922 
923 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
924 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
925 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
926 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
927 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
928 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
929 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
930 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
931 
932 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
933 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
934 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
935 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
936 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
937 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
938 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
940 
941 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
942 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
943 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
944 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
945 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
946 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
947 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
948 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
949 
950 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
951 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
952 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
953 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
954 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
955 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
956 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
957 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
958 
959 #ifdef HAVE_MatIncompleteCholeskyFactor
960 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
961 #undef __FUNC__
962 #define __FUNC__ "MatIncompleteCholeskyFactor_SeqSBAIJ"
963 int MatIncompleteCholeskyFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
964 {
965   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
966   Mat         outA;
967   int         ierr;
968   PetscTruth  row_identity,col_identity;
969 
970   PetscFunctionBegin;
971   /*
972   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
973   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
974   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
975   if (!row_identity || !col_identity) {
976     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
977   }
978   */
979 
980   outA          = inA;
981   inA->factor   = FACTOR_CHOLESKY;
982 
983   if (!a->diag) {
984     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
985   }
986   /*
987       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
988       for ILU(0) factorization with natural ordering
989   */
990   switch (a->bs) {
991   case 1:
992     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
993     PetscLoginfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
994   case 2:
995     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
996     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
997     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
998     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
999     break;
1000   case 3:
1001     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1002     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1003     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1004     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1005     break;
1006   case 4:
1007     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1008     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1009     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1010     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1011     break;
1012   case 5:
1013     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1014     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1015     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1016     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1017     break;
1018   case 6:
1019     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1020     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1021     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1022     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1023     break;
1024   case 7:
1025     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1026     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1027     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1028     PetscLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1029     break;
1030   default:
1031     a->row        = row;
1032     a->icol       = col;
1033     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1034     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1035 
1036     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1037     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1038     PetscLogObjectParent(inA,a->icol);
1039 
1040     if (!a->solve_work) {
1041       ierr = PetscMalloc((A->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1042       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar));
1043     }
1044   }
1045 
1046   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1047 
1048   PetscFunctionReturn(0);
1049 }
1050 #endif
1051 
1052 #undef __FUNC__
1053 #define __FUNC__ "MatPrintHelp_SeqSBAIJ"
1054 int MatPrintHelp_SeqSBAIJ(Mat A)
1055 {
1056   static PetscTruth called = PETSC_FALSE;
1057   MPI_Comm          comm = A->comm;
1058   int               ierr;
1059 
1060   PetscFunctionBegin;
1061   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1062   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1063   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 EXTERN_C_BEGIN
1068 #undef __FUNC__
1069 #define __FUNC__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1070 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1071 {
1072   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1073   int         i,nz,n;
1074 
1075   PetscFunctionBegin;
1076   nz = baij->s_maxnz;
1077   n  = mat->n;
1078   for (i=0; i<nz; i++) {
1079     baij->j[i] = indices[i];
1080   }
1081   baij->s_nz = nz;
1082   for (i=0; i<n; i++) {
1083     baij->ilen[i] = baij->imax[i];
1084   }
1085 
1086   PetscFunctionReturn(0);
1087 }
1088 EXTERN_C_END
1089 
1090 #undef __FUNC__
1091 #define __FUNC__ "MatSeqSBAIJSetColumnIndices"
1092 /*@
1093     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1094        in the matrix.
1095 
1096   Input Parameters:
1097 +  mat     - the SeqSBAIJ matrix
1098 -  indices - the column indices
1099 
1100   Level: advanced
1101 
1102   Notes:
1103     This can be called if you have precomputed the nonzero structure of the
1104   matrix and want to provide it to the matrix object to improve the performance
1105   of the MatSetValues() operation.
1106 
1107     You MUST have set the correct numbers of nonzeros per row in the call to
1108   MatCreateSeqSBAIJ().
1109 
1110     MUST be called before any calls to MatSetValues()
1111 
1112 .seealso: MatCreateSeqSBAIJ
1113 @*/
1114 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1115 {
1116   int ierr,(*f)(Mat,int *);
1117 
1118   PetscFunctionBegin;
1119   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1120   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1121   if (f) {
1122     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1123   } else {
1124     SETERRQ(1,"Wrong type of matrix to set column indices");
1125   }
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNC__
1130 #define __FUNC__ "MatSetUpPreallocation_SeqSBAIJ"
1131 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1132 {
1133   int        ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 /* -------------------------------------------------------------------*/
1141 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1142        MatGetRow_SeqSBAIJ,
1143        MatRestoreRow_SeqSBAIJ,
1144        MatMult_SeqSBAIJ_N,
1145        MatMultAdd_SeqSBAIJ_N,
1146        MatMultTranspose_SeqSBAIJ,
1147        MatMultTransposeAdd_SeqSBAIJ,
1148        MatSolve_SeqSBAIJ_N,
1149        0,
1150        0,
1151        0,
1152        0,
1153        MatCholeskyFactor_SeqSBAIJ,
1154        0,
1155        MatTranspose_SeqSBAIJ,
1156        MatGetInfo_SeqSBAIJ,
1157        MatEqual_SeqSBAIJ,
1158        MatGetDiagonal_SeqSBAIJ,
1159        MatDiagonalScale_SeqSBAIJ,
1160        MatNorm_SeqSBAIJ,
1161        0,
1162        MatAssemblyEnd_SeqSBAIJ,
1163        0,
1164        MatSetOption_SeqSBAIJ,
1165        MatZeroEntries_SeqSBAIJ,
1166        MatZeroRows_SeqSBAIJ,
1167        0,
1168        0,
1169        MatCholeskyFactorSymbolic_SeqSBAIJ,
1170        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1171        MatSetUpPreallocation_SeqSBAIJ,
1172        0,
1173        MatGetOwnershipRange_SeqSBAIJ,
1174        0,
1175        MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ,
1176        0,
1177        0,
1178        MatDuplicate_SeqSBAIJ,
1179        0,
1180        0,
1181        0,
1182        0,
1183        0,
1184        MatGetSubMatrices_SeqSBAIJ,
1185        MatIncreaseOverlap_SeqSBAIJ,
1186        MatGetValues_SeqSBAIJ,
1187        0,
1188        MatPrintHelp_SeqSBAIJ,
1189        MatScale_SeqSBAIJ,
1190        0,
1191        0,
1192        0,
1193        MatGetBlockSize_SeqSBAIJ,
1194        MatGetRowIJ_SeqSBAIJ,
1195        MatRestoreRowIJ_SeqSBAIJ,
1196        0,
1197        0,
1198        0,
1199        0,
1200        0,
1201        0,
1202        MatSetValuesBlocked_SeqSBAIJ,
1203        MatGetSubMatrix_SeqSBAIJ,
1204        0,
1205        0,
1206        MatGetMaps_Petsc,
1207        0,
1208        0,
1209        0,
1210        0,
1211        0,
1212        0,
1213        MatGetRowMax_SeqSBAIJ};
1214 
1215 EXTERN_C_BEGIN
1216 #undef __FUNC__
1217 #define __FUNC__ "MatStoreValues_SeqSBAIJ"
1218 int MatStoreValues_SeqSBAIJ(Mat mat)
1219 {
1220   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1221   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1222   int         ierr;
1223 
1224   PetscFunctionBegin;
1225   if (aij->nonew != 1) {
1226     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1227   }
1228 
1229   /* allocate space for values if not already there */
1230   if (!aij->saved_values) {
1231     ierr = PetscMalloc(nz*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1232   }
1233 
1234   /* copy values over */
1235   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 EXTERN_C_END
1239 
1240 EXTERN_C_BEGIN
1241 #undef __FUNC__
1242 #define __FUNC__ "MatRetrieveValues_SeqSBAIJ"
1243 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1244 {
1245   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1246   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1247 
1248   PetscFunctionBegin;
1249   if (aij->nonew != 1) {
1250     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1251   }
1252   if (!aij->saved_values) {
1253     SETERRQ(1,"Must call MatStoreValues(A);first");
1254   }
1255 
1256   /* copy values over */
1257   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 EXTERN_C_END
1261 
1262 EXTERN_C_BEGIN
1263 #undef __FUNC__
1264 #define __FUNC__ "MatCreate_SeqSBAIJ"
1265 int MatCreate_SeqSBAIJ(Mat B)
1266 {
1267   Mat_SeqSBAIJ *b;
1268   int          ierr,size;
1269 
1270   PetscFunctionBegin;
1271   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1272   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1273   B->m = B->M = PetscMax(B->m,B->M);
1274   B->n = B->N = PetscMax(B->n,B->N);
1275 
1276   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1277   B->data = (void*)b;
1278   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1279   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1280   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1281   B->ops->view        = MatView_SeqSBAIJ;
1282   B->factor           = 0;
1283   B->lupivotthreshold = 1.0;
1284   B->mapping          = 0;
1285   b->row              = 0;
1286   b->icol             = 0;
1287   b->reallocs         = 0;
1288   b->saved_values     = 0;
1289 
1290   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1291   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1292 
1293   b->sorted           = PETSC_FALSE;
1294   b->roworiented      = PETSC_TRUE;
1295   b->nonew            = 0;
1296   b->diag             = 0;
1297   b->solve_work       = 0;
1298   b->mult_work        = 0;
1299   b->spptr            = 0;
1300   b->keepzeroedrows   = PETSC_FALSE;
1301 
1302   b->inew             = 0;
1303   b->jnew             = 0;
1304   b->anew             = 0;
1305   b->a2anew           = 0;
1306   b->permute          = PETSC_FALSE;
1307 
1308   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1309                                      "MatStoreValues_SeqSBAIJ",
1310                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1312                                      "MatRetrieveValues_SeqSBAIJ",
1313                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1315                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1316                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 EXTERN_C_END
1320 
1321 #undef __FUNC__
1322 #define __FUNC__ "MatSeqSBAIJSetPreallocation"
1323 /*@C
1324    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1325    compressed row) format.  For good matrix assembly performance the
1326    user should preallocate the matrix storage by setting the parameter nz
1327    (or the array nnz).  By setting these parameters accurately, performance
1328    during matrix assembly can be increased by more than a factor of 50.
1329 
1330    Collective on Mat
1331 
1332    Input Parameters:
1333 +  A - the symmetric matrix
1334 .  bs - size of block
1335 .  nz - number of block nonzeros per block row (same for all rows)
1336 -  nnz - array containing the number of block nonzeros in the various block rows
1337          (possibly different for each block row) or PETSC_NULL
1338 
1339    Options Database Keys:
1340 .   -mat_no_unroll - uses code that does not unroll the loops in the
1341                      block calculations (much slower)
1342 .    -mat_block_size - size of the blocks to use
1343 
1344    Level: intermediate
1345 
1346    Notes:
1347    The block AIJ format is compatible with standard Fortran 77
1348    storage.  That is, the stored row and column indices can begin at
1349    either one (as in Fortran) or zero.  See the users' manual for details.
1350 
1351    Specify the preallocated storage with either nz or nnz (not both).
1352    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1353    allocation.  For additional details, see the users manual chapter on
1354    matrices.
1355 
1356 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1357 @*/
1358 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1359 {
1360   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1361   int          i,len,ierr,mbs,bs2;
1362   PetscTruth   flg;
1363   int          s_nz;
1364 
1365   PetscFunctionBegin;
1366   B->preallocated = PETSC_TRUE;
1367   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1368   mbs  = B->m/bs;
1369   bs2  = bs*bs;
1370 
1371   if (mbs*bs != B->m) {
1372     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1373   }
1374 
1375   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than -2: value %d",nz);
1376   if (nnz) {
1377     for (i=0; i<mbs; i++) {
1378       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1379     }
1380   }
1381 
1382   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1383   if (!flg) {
1384     switch (bs) {
1385     case 1:
1386       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1387       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1388       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1389       B->ops->mult            = MatMult_SeqSBAIJ_1;
1390       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1391       break;
1392     case 2:
1393       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1394       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1395       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1396       B->ops->mult            = MatMult_SeqSBAIJ_2;
1397       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1398       break;
1399     case 3:
1400       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1401       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1402       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1403       B->ops->mult            = MatMult_SeqSBAIJ_3;
1404       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1405       break;
1406     case 4:
1407       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1408       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1409       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1410       B->ops->mult            = MatMult_SeqSBAIJ_4;
1411       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1412       break;
1413     case 5:
1414       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1415       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1416       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1417       B->ops->mult            = MatMult_SeqSBAIJ_5;
1418       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1419       break;
1420     case 6:
1421       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1422       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1423       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1424       B->ops->mult            = MatMult_SeqSBAIJ_6;
1425       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1426       break;
1427     case 7:
1428       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1429       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1430       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1431       B->ops->mult            = MatMult_SeqSBAIJ_7;
1432       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1433       break;
1434     }
1435   }
1436 
1437   b->mbs = mbs;
1438   b->nbs = mbs;
1439   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1440   if (!nnz) {
1441     if (nz == PETSC_DEFAULT) nz = 5;
1442     else if (nz <= 0)        nz = 1;
1443     for (i=0; i<mbs; i++) {
1444       b->imax[i] = nz;
1445     }
1446     nz = nz*mbs; /* total nz */
1447   } else {
1448     nz = 0;
1449     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1450   }
1451   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1452   s_nz = nz;
1453 
1454   /* allocate the matrix space */
1455   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1456   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1457   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1458   b->j = (int*)(b->a + s_nz*bs2);
1459   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1460   b->i = b->j + s_nz;
1461   b->singlemalloc = PETSC_TRUE;
1462 
1463   /* pointer to beginning of each row */
1464   b->i[0] = 0;
1465   for (i=1; i<mbs+1; i++) {
1466     b->i[i] = b->i[i-1] + b->imax[i-1];
1467   }
1468 
1469   /* b->ilen will count nonzeros in each block row so far. */
1470   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1471   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1472   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1473 
1474   b->bs               = bs;
1475   b->bs2              = bs2;
1476   b->s_nz             = 0;
1477   b->s_maxnz          = s_nz*bs2;
1478 
1479   b->inew             = 0;
1480   b->jnew             = 0;
1481   b->anew             = 0;
1482   b->a2anew           = 0;
1483   b->permute          = PETSC_FALSE;
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 
1488 #undef __FUNC__
1489 #define __FUNC__ "MatCreateSeqSBAIJ"
1490 /*@C
1491    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1492    compressed row) format.  For good matrix assembly performance the
1493    user should preallocate the matrix storage by setting the parameter nz
1494    (or the array nnz).  By setting these parameters accurately, performance
1495    during matrix assembly can be increased by more than a factor of 50.
1496 
1497    Collective on MPI_Comm
1498 
1499    Input Parameters:
1500 +  comm - MPI communicator, set to PETSC_COMM_SELF
1501 .  bs - size of block
1502 .  m - number of rows, or number of columns
1503 .  nz - number of block nonzeros per block row (same for all rows)
1504 -  nnz - array containing the number of block nonzeros in the various block rows
1505          (possibly different for each block row) or PETSC_NULL
1506 
1507    Output Parameter:
1508 .  A - the symmetric matrix
1509 
1510    Options Database Keys:
1511 .   -mat_no_unroll - uses code that does not unroll the loops in the
1512                      block calculations (much slower)
1513 .    -mat_block_size - size of the blocks to use
1514 
1515    Level: intermediate
1516 
1517    Notes:
1518    The block AIJ format is compatible with standard Fortran 77
1519    storage.  That is, the stored row and column indices can begin at
1520    either one (as in Fortran) or zero.  See the users' manual for details.
1521 
1522    Specify the preallocated storage with either nz or nnz (not both).
1523    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1524    allocation.  For additional details, see the users manual chapter on
1525    matrices.
1526 
1527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1528 @*/
1529 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1530 {
1531   int ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1535   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1536   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNC__
1541 #define __FUNC__ "MatDuplicate_SeqSBAIJ"
1542 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1543 {
1544   Mat         C;
1545   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1546   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1547 
1548   PetscFunctionBegin;
1549   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1550 
1551   *B = 0;
1552   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1553   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1554   c    = (Mat_SeqSBAIJ*)C->data;
1555 
1556   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1557   C->preallocated   = PETSC_TRUE;
1558   C->factor         = A->factor;
1559   c->row            = 0;
1560   c->icol           = 0;
1561   c->saved_values   = 0;
1562   c->keepzeroedrows = a->keepzeroedrows;
1563   C->assembled      = PETSC_TRUE;
1564 
1565   c->bs         = a->bs;
1566   c->bs2        = a->bs2;
1567   c->mbs        = a->mbs;
1568   c->nbs        = a->nbs;
1569 
1570   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1571   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1572   for (i=0; i<mbs; i++) {
1573     c->imax[i] = a->imax[i];
1574     c->ilen[i] = a->ilen[i];
1575   }
1576 
1577   /* allocate the matrix space */
1578   c->singlemalloc = PETSC_TRUE;
1579   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1580   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1581   c->j = (int*)(c->a + nz*bs2);
1582   c->i = c->j + nz;
1583   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1584   if (mbs > 0) {
1585     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1586     if (cpvalues == MAT_COPY_VALUES) {
1587       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1588     } else {
1589       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1590     }
1591   }
1592 
1593   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1594   c->sorted      = a->sorted;
1595   c->roworiented = a->roworiented;
1596   c->nonew       = a->nonew;
1597 
1598   if (a->diag) {
1599     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1600     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1601     for (i=0; i<mbs; i++) {
1602       c->diag[i] = a->diag[i];
1603     }
1604   } else c->diag        = 0;
1605   c->s_nz               = a->s_nz;
1606   c->s_maxnz            = a->s_maxnz;
1607   c->solve_work         = 0;
1608   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1609   c->mult_work          = 0;
1610   *B = C;
1611   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNC__
1617 #define __FUNC__ "MatLoad_SeqSBAIJ"
1618 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1619 {
1620   Mat_SeqSBAIJ  *a;
1621   Mat          B;
1622   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1623   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount;
1624   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1625   int          *masked,nmask,tmp,bs2,ishift;
1626   Scalar       *aa;
1627   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1628 
1629   PetscFunctionBegin;
1630   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1631   bs2  = bs*bs;
1632 
1633   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1634   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1635   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1636   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1637   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1638   M = header[1]; N = header[2]; nz = header[3];
1639 
1640   if (header[3] < 0) {
1641     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1642   }
1643 
1644   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1645 
1646   /*
1647      This code adds extra rows to make sure the number of rows is
1648     divisible by the blocksize
1649   */
1650   mbs        = M/bs;
1651   extra_rows = bs - M + bs*(mbs);
1652   if (extra_rows == bs) extra_rows = 0;
1653   else                  mbs++;
1654   if (extra_rows) {
1655     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1656   }
1657 
1658   /* read in row lengths */
1659   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1660   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1661   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1662 
1663   /* read in column indices */
1664   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1665   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1666   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1667 
1668   /* loop over row lengths determining block row lengths */
1669   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1670   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1671   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1672   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1673   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1674   masked   = mask + mbs;
1675   rowcount = 0; nzcount = 0;
1676   for (i=0; i<mbs; i++) {
1677     nmask = 0;
1678     for (j=0; j<bs; j++) {
1679       kmax = rowlengths[rowcount];
1680       for (k=0; k<kmax; k++) {
1681         tmp = jj[nzcount++]/bs;   /* block col. index */
1682         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1683       }
1684       rowcount++;
1685     }
1686     s_browlengths[i] += nmask;
1687     browlengths[i]    = 2*s_browlengths[i];
1688 
1689     /* zero out the mask elements we set */
1690     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1691   }
1692 
1693   /* create our matrix */
1694   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1695   B = *A;
1696   a = (Mat_SeqSBAIJ*)B->data;
1697 
1698   /* set matrix "i" values */
1699   a->i[0] = 0;
1700   for (i=1; i<= mbs; i++) {
1701     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1702     a->ilen[i-1] = s_browlengths[i-1];
1703   }
1704   a->s_nz         = 0;
1705   for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i];
1706 
1707   /* read in nonzero values */
1708   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1709   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1710   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1711 
1712   /* set "a" and "j" values into matrix */
1713   nzcount = 0; jcount = 0;
1714   for (i=0; i<mbs; i++) {
1715     nzcountb = nzcount;
1716     nmask    = 0;
1717     for (j=0; j<bs; j++) {
1718       kmax = rowlengths[i*bs+j];
1719       for (k=0; k<kmax; k++) {
1720         tmp = jj[nzcount++]/bs; /* block col. index */
1721         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1722       }
1723       rowcount++;
1724     }
1725     /* sort the masked values */
1726     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1727 
1728     /* set "j" values into matrix */
1729     maskcount = 1;
1730     for (j=0; j<nmask; j++) {
1731       a->j[jcount++]  = masked[j];
1732       mask[masked[j]] = maskcount++;
1733     }
1734 
1735     /* set "a" values into matrix */
1736     ishift = bs2*a->i[i];
1737     for (j=0; j<bs; j++) {
1738       kmax = rowlengths[i*bs+j];
1739       for (k=0; k<kmax; k++) {
1740         tmp       = jj[nzcountb]/bs ; /* block col. index */
1741         if (tmp >= i){
1742           block     = mask[tmp] - 1;
1743           point     = jj[nzcountb] - bs*tmp;
1744           idx       = ishift + bs2*block + j + bs*point;
1745           a->a[idx] = aa[nzcountb];
1746         }
1747         nzcountb++;
1748       }
1749     }
1750     /* zero out the mask elements we set */
1751     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1752   }
1753   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1754 
1755   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1756   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1757   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1758   ierr = PetscFree(aa);CHKERRQ(ierr);
1759   ierr = PetscFree(jj);CHKERRQ(ierr);
1760   ierr = PetscFree(mask);CHKERRQ(ierr);
1761 
1762   B->assembled = PETSC_TRUE;
1763   ierr = MatView_Private(B);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 EXTERN_C_END
1767 
1768 
1769 
1770