xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision f37cbb0890cfcc075bf42ae2b4760d76e6f1168d)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   int         *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   int          i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135 
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
140   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
141   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
147 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
148 {
149   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
150 
151   PetscFunctionBegin;
152   switch (op) {
153   case MAT_ROW_ORIENTED:
154     a->roworiented    = PETSC_TRUE;
155     break;
156   case MAT_COLUMN_ORIENTED:
157     a->roworiented    = PETSC_FALSE;
158     break;
159   case MAT_COLUMNS_SORTED:
160     a->sorted         = PETSC_TRUE;
161     break;
162   case MAT_COLUMNS_UNSORTED:
163     a->sorted         = PETSC_FALSE;
164     break;
165   case MAT_KEEP_ZEROED_ROWS:
166     a->keepzeroedrows = PETSC_TRUE;
167     break;
168   case MAT_NO_NEW_NONZERO_LOCATIONS:
169     a->nonew          = 1;
170     break;
171   case MAT_NEW_NONZERO_LOCATION_ERR:
172     a->nonew          = -1;
173     break;
174   case MAT_NEW_NONZERO_ALLOCATION_ERR:
175     a->nonew          = -2;
176     break;
177   case MAT_YES_NEW_NONZERO_LOCATIONS:
178     a->nonew          = 0;
179     break;
180   case MAT_ROWS_SORTED:
181   case MAT_ROWS_UNSORTED:
182   case MAT_YES_NEW_DIAGONALS:
183   case MAT_IGNORE_OFF_PROC_ENTRIES:
184   case MAT_USE_HASH_TABLE:
185     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
186     break;
187   case MAT_NO_NEW_DIAGONALS:
188     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
189   case MAT_NOT_SYMMETRIC:
190   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
191   case MAT_HERMITIAN:
192     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
193   case MAT_SYMMETRIC:
194   case MAT_STRUCTURALLY_SYMMETRIC:
195   case MAT_NOT_HERMITIAN:
196   case MAT_SYMMETRY_ETERNAL:
197   case MAT_NOT_SYMMETRY_ETERNAL:
198     break;
199   default:
200     SETERRQ(PETSC_ERR_SUP,"unknown option");
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
208 {
209   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
210   PetscErrorCode ierr;
211   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
212   MatScalar    *aa,*aa_i;
213   PetscScalar  *v_i;
214 
215   PetscFunctionBegin;
216   bs  = a->bs;
217   ai  = a->i;
218   aj  = a->j;
219   aa  = a->a;
220   bs2 = a->bs2;
221 
222   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
223 
224   bn  = row/bs;   /* Block number */
225   bp  = row % bs; /* Block position */
226   M   = ai[bn+1] - ai[bn];
227   *ncols = bs*M;
228 
229   if (v) {
230     *v = 0;
231     if (*ncols) {
232       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
233       for (i=0; i<M; i++) { /* for each block in the block row */
234         v_i  = *v + i*bs;
235         aa_i = aa + bs2*(ai[bn] + i);
236         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
237       }
238     }
239   }
240 
241   if (cols) {
242     *cols = 0;
243     if (*ncols) {
244       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
245       for (i=0; i<M; i++) { /* for each block in the block row */
246         cols_i = *cols + i*bs;
247         itmp  = bs*aj[ai[bn] + i];
248         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
249       }
250     }
251   }
252 
253   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
254   /* this segment is currently removed, so only entries in the upper triangle are obtained */
255 #ifdef column_search
256   v_i    = *v    + M*bs;
257   cols_i = *cols + M*bs;
258   for (i=0; i<bn; i++){ /* for each block row */
259     M = ai[i+1] - ai[i];
260     for (j=0; j<M; j++){
261       itmp = aj[ai[i] + j];    /* block column value */
262       if (itmp == bn){
263         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
264         for (k=0; k<bs; k++) {
265           *cols_i++ = i*bs+k;
266           *v_i++    = aa_i[k];
267         }
268         *ncols += bs;
269         break;
270       }
271     }
272   }
273 #endif
274 
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
280 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
281 {
282   PetscErrorCode ierr;
283 
284   PetscFunctionBegin;
285   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
286   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
292 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
293 {
294   PetscErrorCode ierr;
295   PetscFunctionBegin;
296   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
302 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
303 {
304   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
305   PetscErrorCode ierr;
306   int i,j,bs = a->bs,k,l,bs2=a->bs2;
307   char              *name;
308   PetscViewerFormat format;
309 
310   PetscFunctionBegin;
311   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
312   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
313   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
314     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
315   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
316     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
317   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
318     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
319     for (i=0; i<a->mbs; i++) {
320       for (j=0; j<bs; j++) {
321         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
322         for (k=a->i[i]; k<a->i[i+1]; k++) {
323           for (l=0; l<bs; l++) {
324 #if defined(PETSC_USE_COMPLEX)
325             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
326               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
327                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
328             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
329               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
330                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
331             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
332               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
333             }
334 #else
335             if (a->a[bs2*k + l*bs + j] != 0.0) {
336               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
337             }
338 #endif
339           }
340         }
341         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
342       }
343     }
344     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
345   } else {
346     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
347     for (i=0; i<a->mbs; i++) {
348       for (j=0; j<bs; j++) {
349         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
350         for (k=a->i[i]; k<a->i[i+1]; k++) {
351           for (l=0; l<bs; l++) {
352 #if defined(PETSC_USE_COMPLEX)
353             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
354               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
355                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
356             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
357               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
358                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
359             } else {
360               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
361             }
362 #else
363             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
364 #endif
365           }
366         }
367         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
368       }
369     }
370     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
371   }
372   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
378 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
379 {
380   Mat          A = (Mat) Aa;
381   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
382   PetscErrorCode ierr;
383   int          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
384   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
385   MatScalar    *aa;
386   MPI_Comm     comm;
387   PetscViewer  viewer;
388 
389   PetscFunctionBegin;
390   /*
391       This is nasty. If this is called from an originally parallel matrix
392    then all processes call this,but only the first has the matrix so the
393    rest should return immediately.
394   */
395   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
396   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
397   if (rank) PetscFunctionReturn(0);
398 
399   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
400 
401   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
402   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
403 
404   /* loop over matrix elements drawing boxes */
405   color = PETSC_DRAW_BLUE;
406   for (i=0,row=0; i<mbs; i++,row+=bs) {
407     for (j=a->i[i]; j<a->i[i+1]; j++) {
408       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
409       x_l = a->j[j]*bs; x_r = x_l + 1.0;
410       aa = a->a + j*bs2;
411       for (k=0; k<bs; k++) {
412         for (l=0; l<bs; l++) {
413           if (PetscRealPart(*aa++) >=  0.) continue;
414           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
415         }
416       }
417     }
418   }
419   color = PETSC_DRAW_CYAN;
420   for (i=0,row=0; i<mbs; i++,row+=bs) {
421     for (j=a->i[i]; j<a->i[i+1]; j++) {
422       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
423       x_l = a->j[j]*bs; x_r = x_l + 1.0;
424       aa = a->a + j*bs2;
425       for (k=0; k<bs; k++) {
426         for (l=0; l<bs; l++) {
427           if (PetscRealPart(*aa++) != 0.) continue;
428           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
429         }
430       }
431     }
432   }
433 
434   color = PETSC_DRAW_RED;
435   for (i=0,row=0; i<mbs; i++,row+=bs) {
436     for (j=a->i[i]; j<a->i[i+1]; j++) {
437       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
438       x_l = a->j[j]*bs; x_r = x_l + 1.0;
439       aa = a->a + j*bs2;
440       for (k=0; k<bs; k++) {
441         for (l=0; l<bs; l++) {
442           if (PetscRealPart(*aa++) <= 0.) continue;
443           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
444         }
445       }
446     }
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
453 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
454 {
455   PetscErrorCode ierr;
456   PetscReal    xl,yl,xr,yr,w,h;
457   PetscDraw    draw;
458   PetscTruth   isnull;
459 
460   PetscFunctionBegin;
461 
462   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
463   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
464 
465   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
466   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
467   xr += w;    yr += h;  xl = -w;     yl = -h;
468   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
469   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
470   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatView_SeqSBAIJ"
476 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
477 {
478   PetscErrorCode ierr;
479   PetscTruth iascii,isdraw;
480 
481   PetscFunctionBegin;
482   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
483   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
484   if (iascii){
485     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
486   } else if (isdraw) {
487     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
488   } else {
489     Mat B;
490     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
491     ierr = MatView(B,viewer);CHKERRQ(ierr);
492     ierr = MatDestroy(B);CHKERRQ(ierr);
493   }
494   PetscFunctionReturn(0);
495 }
496 
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
500 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
501 {
502   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
503   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
504   int        *ai = a->i,*ailen = a->ilen;
505   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
506   MatScalar  *ap,*aa = a->a,zero = 0.0;
507 
508   PetscFunctionBegin;
509   for (k=0; k<m; k++) { /* loop over rows */
510     row  = im[k]; brow = row/bs;
511     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
512     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
513     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
514     nrow = ailen[brow];
515     for (l=0; l<n; l++) { /* loop over columns */
516       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
517       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
518       col  = in[l] ;
519       bcol = col/bs;
520       cidx = col%bs;
521       ridx = row%bs;
522       high = nrow;
523       low  = 0; /* assume unsorted */
524       while (high-low > 5) {
525         t = (low+high)/2;
526         if (rp[t] > bcol) high = t;
527         else             low  = t;
528       }
529       for (i=low; i<high; i++) {
530         if (rp[i] > bcol) break;
531         if (rp[i] == bcol) {
532           *v++ = ap[bs2*i+bs*cidx+ridx];
533           goto finished;
534         }
535       }
536       *v++ = zero;
537       finished:;
538     }
539   }
540   PetscFunctionReturn(0);
541 }
542 
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
546 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
547 {
548   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
549   PetscErrorCode ierr;
550   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
551   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
552   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
553   PetscTruth      roworiented=a->roworiented;
554   const MatScalar *value = v;
555   MatScalar       *ap,*aa = a->a,*bap;
556 
557   PetscFunctionBegin;
558   if (roworiented) {
559     stepval = (n-1)*bs;
560   } else {
561     stepval = (m-1)*bs;
562   }
563   for (k=0; k<m; k++) { /* loop over added rows */
564     row  = im[k];
565     if (row < 0) continue;
566 #if defined(PETSC_USE_BOPT_g)
567     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
568 #endif
569     rp   = aj + ai[row];
570     ap   = aa + bs2*ai[row];
571     rmax = imax[row];
572     nrow = ailen[row];
573     low  = 0;
574     for (l=0; l<n; l++) { /* loop over added columns */
575       if (in[l] < 0) continue;
576       col = in[l];
577 #if defined(PETSC_USE_BOPT_g)
578       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
579 #endif
580       if (col < row) continue; /* ignore lower triangular block */
581       if (roworiented) {
582         value = v + k*(stepval+bs)*bs + l*bs;
583       } else {
584         value = v + l*(stepval+bs)*bs + k*bs;
585       }
586       if (!sorted) low = 0; high = nrow;
587       while (high-low > 7) {
588         t = (low+high)/2;
589         if (rp[t] > col) high = t;
590         else             low  = t;
591       }
592       for (i=low; i<high; i++) {
593         if (rp[i] > col) break;
594         if (rp[i] == col) {
595           bap  = ap +  bs2*i;
596           if (roworiented) {
597             if (is == ADD_VALUES) {
598               for (ii=0; ii<bs; ii++,value+=stepval) {
599                 for (jj=ii; jj<bs2; jj+=bs) {
600                   bap[jj] += *value++;
601                 }
602               }
603             } else {
604               for (ii=0; ii<bs; ii++,value+=stepval) {
605                 for (jj=ii; jj<bs2; jj+=bs) {
606                   bap[jj] = *value++;
607                 }
608               }
609             }
610           } else {
611             if (is == ADD_VALUES) {
612               for (ii=0; ii<bs; ii++,value+=stepval) {
613                 for (jj=0; jj<bs; jj++) {
614                   *bap++ += *value++;
615                 }
616               }
617             } else {
618               for (ii=0; ii<bs; ii++,value+=stepval) {
619                 for (jj=0; jj<bs; jj++) {
620                   *bap++  = *value++;
621                 }
622               }
623             }
624           }
625           goto noinsert2;
626         }
627       }
628       if (nonew == 1) goto noinsert2;
629       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
630       if (nrow >= rmax) {
631         /* there is no extra room in row, therefore enlarge */
632         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
633         MatScalar *new_a;
634 
635         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
636 
637         /* malloc new storage space */
638         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
639 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
640         new_j   = (int*)(new_a + bs2*new_nz);
641         new_i   = new_j + new_nz;
642 
643         /* copy over old data into new slots */
644         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
645         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
646         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
647         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
648         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
649         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
650         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
651         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
652         /* free up old matrix storage */
653         ierr = PetscFree(a->a);CHKERRQ(ierr);
654         if (!a->singlemalloc) {
655           ierr = PetscFree(a->i);CHKERRQ(ierr);
656           ierr = PetscFree(a->j);CHKERRQ(ierr);
657         }
658         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
659         a->singlemalloc = PETSC_TRUE;
660 
661         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
662         rmax = imax[row] = imax[row] + CHUNKSIZE;
663         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
664         a->maxnz += bs2*CHUNKSIZE;
665         a->reallocs++;
666         a->nz++;
667       }
668       N = nrow++ - 1;
669       /* shift up all the later entries in this row */
670       for (ii=N; ii>=i; ii--) {
671         rp[ii+1] = rp[ii];
672         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
673       }
674       if (N >= i) {
675         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
676       }
677       rp[i] = col;
678       bap   = ap +  bs2*i;
679       if (roworiented) {
680         for (ii=0; ii<bs; ii++,value+=stepval) {
681           for (jj=ii; jj<bs2; jj+=bs) {
682             bap[jj] = *value++;
683           }
684         }
685       } else {
686         for (ii=0; ii<bs; ii++,value+=stepval) {
687           for (jj=0; jj<bs; jj++) {
688             *bap++  = *value++;
689           }
690         }
691       }
692       noinsert2:;
693       low = i;
694     }
695     ailen[row] = nrow;
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
702 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
703 {
704   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
705   PetscErrorCode ierr;
706   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
707   int        m = A->m,*ip,N,*ailen = a->ilen;
708   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
709   MatScalar  *aa = a->a,*ap;
710 
711   PetscFunctionBegin;
712   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
713 
714   if (m) rmax = ailen[0];
715   for (i=1; i<mbs; i++) {
716     /* move each row back by the amount of empty slots (fshift) before it*/
717     fshift += imax[i-1] - ailen[i-1];
718     rmax   = PetscMax(rmax,ailen[i]);
719     if (fshift) {
720       ip = aj + ai[i]; ap = aa + bs2*ai[i];
721       N = ailen[i];
722       for (j=0; j<N; j++) {
723         ip[j-fshift] = ip[j];
724         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
725       }
726     }
727     ai[i] = ai[i-1] + ailen[i-1];
728   }
729   if (mbs) {
730     fshift += imax[mbs-1] - ailen[mbs-1];
731     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
732   }
733   /* reset ilen and imax for each row */
734   for (i=0; i<mbs; i++) {
735     ailen[i] = imax[i] = ai[i+1] - ai[i];
736   }
737   a->nz = ai[mbs];
738 
739   /* diagonals may have moved, reset it */
740   if (a->diag) {
741     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
742   }
743   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
744            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
745   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
746            a->reallocs);
747   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
748   a->reallocs          = 0;
749   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
750 
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755    This function returns an array of flags which indicate the locations of contiguous
756    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
757    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
758    Assume: sizes should be long enough to hold all the values.
759 */
760 #undef __FUNCT__
761 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
762 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
763 {
764   int        i,j,k,row;
765   PetscTruth flg;
766 
767   PetscFunctionBegin;
768   for (i=0,j=0; i<n; j++) {
769     row = idx[i];
770     if (row%bs!=0) { /* Not the begining of a block */
771       sizes[j] = 1;
772       i++;
773     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
774       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
775       i++;
776     } else { /* Begining of the block, so check if the complete block exists */
777       flg = PETSC_TRUE;
778       for (k=1; k<bs; k++) {
779         if (row+k != idx[i+k]) { /* break in the block */
780           flg = PETSC_FALSE;
781           break;
782         }
783       }
784       if (flg == PETSC_TRUE) { /* No break in the bs */
785         sizes[j] = bs;
786         i+= bs;
787       } else {
788         sizes[j] = 1;
789         i++;
790       }
791     }
792   }
793   *bs_max = j;
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
799 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
800 {
801   PetscFunctionBegin;
802   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
803 }
804 
805 /* Only add/insert a(i,j) with i<=j (blocks).
806    Any a(i,j) with i>j input by user is ingored.
807 */
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
811 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
812 {
813   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
814   PetscErrorCode ierr;
815   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
816   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
817   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
818   int         ridx,cidx,bs2=a->bs2;
819   MatScalar   *ap,value,*aa=a->a,*bap;
820 
821   PetscFunctionBegin;
822 
823   for (k=0; k<m; k++) { /* loop over added rows */
824     row  = im[k];       /* row number */
825     brow = row/bs;      /* block row number */
826     if (row < 0) continue;
827 #if defined(PETSC_USE_BOPT_g)
828     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
829 #endif
830     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
831     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
832     rmax = imax[brow];  /* maximum space allocated for this row */
833     nrow = ailen[brow]; /* actual length of this row */
834     low  = 0;
835 
836     for (l=0; l<n; l++) { /* loop over added columns */
837       if (in[l] < 0) continue;
838 #if defined(PETSC_USE_BOPT_g)
839       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
840 #endif
841       col = in[l];
842       bcol = col/bs;              /* block col number */
843 
844       if (brow <= bcol){
845         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
846         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
847           /* element value a(k,l) */
848           if (roworiented) {
849             value = v[l + k*n];
850           } else {
851             value = v[k + l*m];
852           }
853 
854           /* move pointer bap to a(k,l) quickly and add/insert value */
855           if (!sorted) low = 0; high = nrow;
856           while (high-low > 7) {
857             t = (low+high)/2;
858             if (rp[t] > bcol) high = t;
859             else              low  = t;
860           }
861           for (i=low; i<high; i++) {
862             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
863             if (rp[i] > bcol) break;
864             if (rp[i] == bcol) {
865               bap  = ap +  bs2*i + bs*cidx + ridx;
866               if (is == ADD_VALUES) *bap += value;
867               else                  *bap  = value;
868               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
869               if (brow == bcol && ridx < cidx){
870                 bap  = ap +  bs2*i + bs*ridx + cidx;
871                 if (is == ADD_VALUES) *bap += value;
872                 else                  *bap  = value;
873               }
874               goto noinsert1;
875             }
876           }
877 
878       if (nonew == 1) goto noinsert1;
879       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
880       if (nrow >= rmax) {
881         /* there is no extra room in row, therefore enlarge */
882         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
883         MatScalar *new_a;
884 
885         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
886 
887         /* Malloc new storage space */
888         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
889         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
890         new_j = (int*)(new_a + bs2*new_nz);
891         new_i = new_j + new_nz;
892 
893         /* copy over old data into new slots */
894         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
895         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
896         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
897         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
898         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
899         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
900         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
901         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
902         /* free up old matrix storage */
903         ierr = PetscFree(a->a);CHKERRQ(ierr);
904         if (!a->singlemalloc) {
905           ierr = PetscFree(a->i);CHKERRQ(ierr);
906           ierr = PetscFree(a->j);CHKERRQ(ierr);
907         }
908         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
909         a->singlemalloc = PETSC_TRUE;
910 
911         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
912         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
913         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
914         a->maxnz += bs2*CHUNKSIZE;
915         a->reallocs++;
916         a->nz++;
917       }
918 
919       N = nrow++ - 1;
920       /* shift up all the later entries in this row */
921       for (ii=N; ii>=i; ii--) {
922         rp[ii+1] = rp[ii];
923         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
924       }
925       if (N>=i) {
926         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
927       }
928       rp[i]                      = bcol;
929       ap[bs2*i + bs*cidx + ridx] = value;
930       noinsert1:;
931       low = i;
932       /* } */
933         }
934       } /* end of if .. if..  */
935     }                     /* end of loop over added columns */
936     ailen[brow] = nrow;
937   }                       /* end of loop over added rows */
938 
939   PetscFunctionReturn(0);
940 }
941 
942 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
943 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
944 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
945 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
946 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
947 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
948 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
949 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
950 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
951 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
952 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
953 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
954 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
955 
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
964 
965 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
966 
967 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
968 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
975 
976 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
977 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
984 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
985 
986 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
987 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
994 
995 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
996 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1003 
1004 #ifdef HAVE_MatICCFactor
1005 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1008 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1009 {
1010   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1011   Mat         outA;
1012   PetscErrorCode ierr;
1013   PetscTruth  row_identity,col_identity;
1014 
1015   PetscFunctionBegin;
1016   outA          = inA;
1017   inA->factor   = FACTOR_CHOLESKY;
1018 
1019   if (!a->diag) {
1020     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1021   }
1022   /*
1023       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1024       for ILU(0) factorization with natural ordering
1025   */
1026   switch (a->bs) {
1027   case 1:
1028     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1029     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1030     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1031     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1032   case 2:
1033     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1034     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1035     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1036     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1037     break;
1038   case 3:
1039     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1040     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1041     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1042     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1043     break;
1044   case 4:
1045     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1046     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1047     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1048     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1049     break;
1050   case 5:
1051     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1052     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1053     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1054     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1055     break;
1056   case 6:
1057     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1058     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1059     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1060     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1061     break;
1062   case 7:
1063     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1065     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1066     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1067     break;
1068   default:
1069     a->row        = row;
1070     a->icol       = col;
1071     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1072     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1073 
1074     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1075     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1076     PetscLogObjectParent(inA,a->icol);
1077 
1078     if (!a->solve_work) {
1079       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1080       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1081     }
1082   }
1083 
1084   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1085 
1086   PetscFunctionReturn(0);
1087 }
1088 #endif
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1092 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1093 {
1094   static PetscTruth called = PETSC_FALSE;
1095   MPI_Comm          comm = A->comm;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1100   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1101   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 EXTERN_C_BEGIN
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1108 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1109 {
1110   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1111   int         i,nz,n;
1112 
1113   PetscFunctionBegin;
1114   nz = baij->maxnz;
1115   n  = mat->n;
1116   for (i=0; i<nz; i++) {
1117     baij->j[i] = indices[i];
1118   }
1119   baij->nz = nz;
1120   for (i=0; i<n; i++) {
1121     baij->ilen[i] = baij->imax[i];
1122   }
1123 
1124   PetscFunctionReturn(0);
1125 }
1126 EXTERN_C_END
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1130 /*@
1131     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1132        in the matrix.
1133 
1134   Input Parameters:
1135 +  mat     - the SeqSBAIJ matrix
1136 -  indices - the column indices
1137 
1138   Level: advanced
1139 
1140   Notes:
1141     This can be called if you have precomputed the nonzero structure of the
1142   matrix and want to provide it to the matrix object to improve the performance
1143   of the MatSetValues() operation.
1144 
1145     You MUST have set the correct numbers of nonzeros per row in the call to
1146   MatCreateSeqSBAIJ().
1147 
1148     MUST be called before any calls to MatSetValues()
1149 
1150 .seealso: MatCreateSeqSBAIJ
1151 @*/
1152 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1153 {
1154   PetscErrorCode ierr,(*f)(Mat,int *);
1155 
1156   PetscFunctionBegin;
1157   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1158   PetscValidPointer(indices,2);
1159   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1160   if (f) {
1161     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1162   } else {
1163     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1164   }
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1170 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1171 {
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1181 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1182 {
1183   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1184   PetscFunctionBegin;
1185   *array = a->a;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1191 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1192 {
1193   PetscFunctionBegin;
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #include "petscblaslapack.h"
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1200 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1201 {
1202   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1203   PetscErrorCode ierr;
1204   int            i,bs=y->bs,bs2,j;
1205   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1206 
1207   PetscFunctionBegin;
1208   if (str == SAME_NONZERO_PATTERN) {
1209     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1210   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1211     if (y->xtoy && y->XtoY != X) {
1212       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1213       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1214     }
1215     if (!y->xtoy) { /* get xtoy */
1216       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1217       y->XtoY = X;
1218     }
1219     bs2 = bs*bs;
1220     for (i=0; i<x->nz; i++) {
1221       j = 0;
1222       while (j < bs2){
1223         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1224         j++;
1225       }
1226     }
1227     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1228   } else {
1229     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1230   }
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1236 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1237 {
1238   PetscFunctionBegin;
1239   *flg = PETSC_TRUE;
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1245 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1246 {
1247   PetscFunctionBegin;
1248   *flg = PETSC_TRUE;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1254 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1255 {
1256   PetscFunctionBegin;
1257   *flg = PETSC_FALSE;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 /* -------------------------------------------------------------------*/
1262 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1263        MatGetRow_SeqSBAIJ,
1264        MatRestoreRow_SeqSBAIJ,
1265        MatMult_SeqSBAIJ_N,
1266 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1267        MatMult_SeqSBAIJ_N,
1268        MatMultAdd_SeqSBAIJ_N,
1269        MatSolve_SeqSBAIJ_N,
1270        0,
1271        0,
1272 /*10*/ 0,
1273        0,
1274        MatCholeskyFactor_SeqSBAIJ,
1275        MatRelax_SeqSBAIJ,
1276        MatTranspose_SeqSBAIJ,
1277 /*15*/ MatGetInfo_SeqSBAIJ,
1278        MatEqual_SeqSBAIJ,
1279        MatGetDiagonal_SeqSBAIJ,
1280        MatDiagonalScale_SeqSBAIJ,
1281        MatNorm_SeqSBAIJ,
1282 /*20*/ 0,
1283        MatAssemblyEnd_SeqSBAIJ,
1284        0,
1285        MatSetOption_SeqSBAIJ,
1286        MatZeroEntries_SeqSBAIJ,
1287 /*25*/ MatZeroRows_SeqSBAIJ,
1288        0,
1289        0,
1290        MatCholeskyFactorSymbolic_SeqSBAIJ,
1291        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1292 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1293        0,
1294        MatICCFactorSymbolic_SeqSBAIJ,
1295        MatGetArray_SeqSBAIJ,
1296        MatRestoreArray_SeqSBAIJ,
1297 /*35*/ MatDuplicate_SeqSBAIJ,
1298        0,
1299        0,
1300        0,
1301        0,
1302 /*40*/ MatAXPY_SeqSBAIJ,
1303        MatGetSubMatrices_SeqSBAIJ,
1304        MatIncreaseOverlap_SeqSBAIJ,
1305        MatGetValues_SeqSBAIJ,
1306        0,
1307 /*45*/ MatPrintHelp_SeqSBAIJ,
1308        MatScale_SeqSBAIJ,
1309        0,
1310        0,
1311        0,
1312 /*50*/ MatGetBlockSize_SeqSBAIJ,
1313        MatGetRowIJ_SeqSBAIJ,
1314        MatRestoreRowIJ_SeqSBAIJ,
1315        0,
1316        0,
1317 /*55*/ 0,
1318        0,
1319        0,
1320        0,
1321        MatSetValuesBlocked_SeqSBAIJ,
1322 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1323        0,
1324        0,
1325        MatGetPetscMaps_Petsc,
1326        0,
1327 /*65*/ 0,
1328        0,
1329        0,
1330        0,
1331        0,
1332 /*70*/ MatGetRowMax_SeqSBAIJ,
1333        0,
1334        0,
1335        0,
1336        0,
1337 /*75*/ 0,
1338        0,
1339        0,
1340        0,
1341        0,
1342 /*80*/ 0,
1343        0,
1344        0,
1345 #if !defined(PETSC_USE_COMPLEX)
1346        MatGetInertia_SeqSBAIJ,
1347 #else
1348        0,
1349 #endif
1350        MatLoad_SeqSBAIJ,
1351 /*85*/ MatIsSymmetric_SeqSBAIJ,
1352        MatIsHermitian_SeqSBAIJ,
1353        MatIsStructurallySymmetric_SeqSBAIJ,
1354        0,
1355        0,
1356 /*90*/ 0,
1357        0,
1358        0,
1359        0,
1360        0,
1361 /*95*/ 0,
1362        0,
1363        0,
1364        0};
1365 
1366 EXTERN_C_BEGIN
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1369 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1370 {
1371   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1372   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   if (aij->nonew != 1) {
1377     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1378   }
1379 
1380   /* allocate space for values if not already there */
1381   if (!aij->saved_values) {
1382     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1383   }
1384 
1385   /* copy values over */
1386   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 EXTERN_C_END
1390 
1391 EXTERN_C_BEGIN
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1394 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1395 {
1396   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1397   PetscErrorCode ierr;
1398   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1399 
1400   PetscFunctionBegin;
1401   if (aij->nonew != 1) {
1402     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1403   }
1404   if (!aij->saved_values) {
1405     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1406   }
1407 
1408   /* copy values over */
1409   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 EXTERN_C_END
1413 
1414 EXTERN_C_BEGIN
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1417 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1418 {
1419   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1420   PetscErrorCode ierr;
1421   int          i,len,mbs,bs2;
1422   PetscTruth   flg;
1423 
1424   PetscFunctionBegin;
1425   B->preallocated = PETSC_TRUE;
1426   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1427   mbs  = B->m/bs;
1428   bs2  = bs*bs;
1429 
1430   if (mbs*bs != B->m) {
1431     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1432   }
1433 
1434   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1435   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1436   if (nnz) {
1437     for (i=0; i<mbs; i++) {
1438       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1439       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1440     }
1441   }
1442 
1443   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1444   if (!flg) {
1445     switch (bs) {
1446     case 1:
1447       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1448       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1449       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1450       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1451       B->ops->mult            = MatMult_SeqSBAIJ_1;
1452       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1453       break;
1454     case 2:
1455       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1456       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1457       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_2;
1458       B->ops->mult            = MatMult_SeqSBAIJ_2;
1459       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1460       break;
1461     case 3:
1462       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1463       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1464       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_3;
1465       B->ops->mult            = MatMult_SeqSBAIJ_3;
1466       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1467       break;
1468     case 4:
1469       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1470       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1471       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_4;
1472       B->ops->mult            = MatMult_SeqSBAIJ_4;
1473       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1474       break;
1475     case 5:
1476       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1477       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1478       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_5;
1479       B->ops->mult            = MatMult_SeqSBAIJ_5;
1480       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1481       break;
1482     case 6:
1483       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1484       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1485       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_6;
1486       B->ops->mult            = MatMult_SeqSBAIJ_6;
1487       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1488       break;
1489     case 7:
1490       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1491       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1492       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_7;
1493       B->ops->mult            = MatMult_SeqSBAIJ_7;
1494       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1495       break;
1496     }
1497   }
1498 
1499   b->mbs = mbs;
1500   b->nbs = mbs;
1501   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1502   if (!nnz) {
1503     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1504     else if (nz <= 0)        nz = 1;
1505     for (i=0; i<mbs; i++) {
1506       b->imax[i] = nz;
1507     }
1508     nz = nz*mbs; /* total nz */
1509   } else {
1510     nz = 0;
1511     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1512   }
1513   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1514 
1515   /* allocate the matrix space */
1516   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1517   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1518   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1519   b->j = (int*)(b->a + nz*bs2);
1520   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1521   b->i = b->j + nz;
1522   b->singlemalloc = PETSC_TRUE;
1523 
1524   /* pointer to beginning of each row */
1525   b->i[0] = 0;
1526   for (i=1; i<mbs+1; i++) {
1527     b->i[i] = b->i[i-1] + b->imax[i-1];
1528   }
1529 
1530   /* b->ilen will count nonzeros in each block row so far. */
1531   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1532   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1533   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1534 
1535   b->bs               = bs;
1536   b->bs2              = bs2;
1537   b->nz             = 0;
1538   b->maxnz          = nz*bs2;
1539 
1540   b->inew             = 0;
1541   b->jnew             = 0;
1542   b->anew             = 0;
1543   b->a2anew           = 0;
1544   b->permute          = PETSC_FALSE;
1545   PetscFunctionReturn(0);
1546 }
1547 EXTERN_C_END
1548 
1549 EXTERN_C_BEGIN
1550 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1551 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1552 EXTERN_C_END
1553 
1554 /*MC
1555    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1556    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1557 
1558    Options Database Keys:
1559 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1560 
1561   Level: beginner
1562 
1563 .seealso: MatCreateSeqSBAIJ
1564 M*/
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1569 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1570 {
1571   Mat_SeqSBAIJ *b;
1572   PetscErrorCode ierr;
1573   int size;
1574 
1575   PetscFunctionBegin;
1576   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1577   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1578   B->m = B->M = PetscMax(B->m,B->M);
1579   B->n = B->N = PetscMax(B->n,B->N);
1580 
1581   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1582   B->data = (void*)b;
1583   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1584   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1585   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1586   B->ops->view        = MatView_SeqSBAIJ;
1587   B->factor           = 0;
1588   B->lupivotthreshold = 1.0;
1589   B->mapping          = 0;
1590   b->row              = 0;
1591   b->icol             = 0;
1592   b->reallocs         = 0;
1593   b->saved_values     = 0;
1594 
1595   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1596   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1597 
1598   b->sorted           = PETSC_FALSE;
1599   b->roworiented      = PETSC_TRUE;
1600   b->nonew            = 0;
1601   b->diag             = 0;
1602   b->solve_work       = 0;
1603   b->mult_work        = 0;
1604   B->spptr            = 0;
1605   b->keepzeroedrows   = PETSC_FALSE;
1606   b->xtoy             = 0;
1607   b->XtoY             = 0;
1608 
1609   b->inew             = 0;
1610   b->jnew             = 0;
1611   b->anew             = 0;
1612   b->a2anew           = 0;
1613   b->permute          = PETSC_FALSE;
1614 
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1616                                      "MatStoreValues_SeqSBAIJ",
1617                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1619                                      "MatRetrieveValues_SeqSBAIJ",
1620                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1622                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1623                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1625                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1626                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1628                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1629                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1631                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1632                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1633 
1634   B->symmetric                  = PETSC_TRUE;
1635   B->structurally_symmetric     = PETSC_TRUE;
1636   B->symmetric_set              = PETSC_TRUE;
1637   B->structurally_symmetric_set = PETSC_TRUE;
1638   PetscFunctionReturn(0);
1639 }
1640 EXTERN_C_END
1641 
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1644 /*@C
1645    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1646    compressed row) format.  For good matrix assembly performance the
1647    user should preallocate the matrix storage by setting the parameter nz
1648    (or the array nnz).  By setting these parameters accurately, performance
1649    during matrix assembly can be increased by more than a factor of 50.
1650 
1651    Collective on Mat
1652 
1653    Input Parameters:
1654 +  A - the symmetric matrix
1655 .  bs - size of block
1656 .  nz - number of block nonzeros per block row (same for all rows)
1657 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1658          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1659 
1660    Options Database Keys:
1661 .   -mat_no_unroll - uses code that does not unroll the loops in the
1662                      block calculations (much slower)
1663 .    -mat_block_size - size of the blocks to use
1664 
1665    Level: intermediate
1666 
1667    Notes:
1668    Specify the preallocated storage with either nz or nnz (not both).
1669    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1670    allocation.  For additional details, see the users manual chapter on
1671    matrices.
1672 
1673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1674 @*/
1675 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1676   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1680   if (f) {
1681     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatCreateSeqSBAIJ"
1688 /*@C
1689    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1690    compressed row) format.  For good matrix assembly performance the
1691    user should preallocate the matrix storage by setting the parameter nz
1692    (or the array nnz).  By setting these parameters accurately, performance
1693    during matrix assembly can be increased by more than a factor of 50.
1694 
1695    Collective on MPI_Comm
1696 
1697    Input Parameters:
1698 +  comm - MPI communicator, set to PETSC_COMM_SELF
1699 .  bs - size of block
1700 .  m - number of rows, or number of columns
1701 .  nz - number of block nonzeros per block row (same for all rows)
1702 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1703          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1704 
1705    Output Parameter:
1706 .  A - the symmetric matrix
1707 
1708    Options Database Keys:
1709 .   -mat_no_unroll - uses code that does not unroll the loops in the
1710                      block calculations (much slower)
1711 .    -mat_block_size - size of the blocks to use
1712 
1713    Level: intermediate
1714 
1715    Notes:
1716 
1717    Specify the preallocated storage with either nz or nnz (not both).
1718    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1719    allocation.  For additional details, see the users manual chapter on
1720    matrices.
1721 
1722 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1723 @*/
1724 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1725 {
1726   PetscErrorCode ierr;
1727 
1728   PetscFunctionBegin;
1729   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1730   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1731   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1737 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1738 {
1739   Mat          C;
1740   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1741   PetscErrorCode ierr;
1742   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1743 
1744   PetscFunctionBegin;
1745   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1746 
1747   *B = 0;
1748   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1749   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1750   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   c    = (Mat_SeqSBAIJ*)C->data;
1752 
1753   C->preallocated   = PETSC_TRUE;
1754   C->factor         = A->factor;
1755   c->row            = 0;
1756   c->icol           = 0;
1757   c->saved_values   = 0;
1758   c->keepzeroedrows = a->keepzeroedrows;
1759   C->assembled      = PETSC_TRUE;
1760 
1761   C->M    = A->M;
1762   C->N    = A->N;
1763   c->bs   = a->bs;
1764   c->bs2  = a->bs2;
1765   c->mbs  = a->mbs;
1766   c->nbs  = a->nbs;
1767 
1768   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1769   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1770   for (i=0; i<mbs; i++) {
1771     c->imax[i] = a->imax[i];
1772     c->ilen[i] = a->ilen[i];
1773   }
1774 
1775   /* allocate the matrix space */
1776   c->singlemalloc = PETSC_TRUE;
1777   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1778   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1779   c->j = (int*)(c->a + nz*bs2);
1780   c->i = c->j + nz;
1781   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1782   if (mbs > 0) {
1783     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1784     if (cpvalues == MAT_COPY_VALUES) {
1785       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1786     } else {
1787       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1788     }
1789   }
1790 
1791   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1792   c->sorted      = a->sorted;
1793   c->roworiented = a->roworiented;
1794   c->nonew       = a->nonew;
1795 
1796   if (a->diag) {
1797     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1798     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1799     for (i=0; i<mbs; i++) {
1800       c->diag[i] = a->diag[i];
1801     }
1802   } else c->diag        = 0;
1803   c->nz               = a->nz;
1804   c->maxnz            = a->maxnz;
1805   c->solve_work         = 0;
1806   c->mult_work          = 0;
1807   *B = C;
1808   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1814 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1815 {
1816   Mat_SeqSBAIJ *a;
1817   Mat          B;
1818   PetscErrorCode ierr;
1819   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1820   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1821   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1822   int          *masked,nmask,tmp,bs2,ishift;
1823   PetscScalar  *aa;
1824   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1828   bs2  = bs*bs;
1829 
1830   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1831   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1832   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1833   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1834   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1835   M = header[1]; N = header[2]; nz = header[3];
1836 
1837   if (header[3] < 0) {
1838     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1839   }
1840 
1841   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1842 
1843   /*
1844      This code adds extra rows to make sure the number of rows is
1845     divisible by the blocksize
1846   */
1847   mbs        = M/bs;
1848   extra_rows = bs - M + bs*(mbs);
1849   if (extra_rows == bs) extra_rows = 0;
1850   else                  mbs++;
1851   if (extra_rows) {
1852     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1853   }
1854 
1855   /* read in row lengths */
1856   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1857   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1858   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1859 
1860   /* read in column indices */
1861   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1862   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1863   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1864 
1865   /* loop over row lengths determining block row lengths */
1866   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1867   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1868   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1869   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1870   masked   = mask + mbs;
1871   rowcount = 0; nzcount = 0;
1872   for (i=0; i<mbs; i++) {
1873     nmask = 0;
1874     for (j=0; j<bs; j++) {
1875       kmax = rowlengths[rowcount];
1876       for (k=0; k<kmax; k++) {
1877         tmp = jj[nzcount++]/bs;   /* block col. index */
1878         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1879       }
1880       rowcount++;
1881     }
1882     s_browlengths[i] += nmask;
1883 
1884     /* zero out the mask elements we set */
1885     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1886   }
1887 
1888   /* create our matrix */
1889   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1890   ierr = MatSetType(B,type);CHKERRQ(ierr);
1891   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1892   a = (Mat_SeqSBAIJ*)B->data;
1893 
1894   /* set matrix "i" values */
1895   a->i[0] = 0;
1896   for (i=1; i<= mbs; i++) {
1897     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1898     a->ilen[i-1] = s_browlengths[i-1];
1899   }
1900   a->nz = a->i[mbs];
1901 
1902   /* read in nonzero values */
1903   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1904   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1905   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1906 
1907   /* set "a" and "j" values into matrix */
1908   nzcount = 0; jcount = 0;
1909   for (i=0; i<mbs; i++) {
1910     nzcountb = nzcount;
1911     nmask    = 0;
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[i*bs+j];
1914       for (k=0; k<kmax; k++) {
1915         tmp = jj[nzcount++]/bs; /* block col. index */
1916         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1917       }
1918     }
1919     /* sort the masked values */
1920     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1921 
1922     /* set "j" values into matrix */
1923     maskcount = 1;
1924     for (j=0; j<nmask; j++) {
1925       a->j[jcount++]  = masked[j];
1926       mask[masked[j]] = maskcount++;
1927     }
1928 
1929     /* set "a" values into matrix */
1930     ishift = bs2*a->i[i];
1931     for (j=0; j<bs; j++) {
1932       kmax = rowlengths[i*bs+j];
1933       for (k=0; k<kmax; k++) {
1934         tmp       = jj[nzcountb]/bs ; /* block col. index */
1935         if (tmp >= i){
1936           block     = mask[tmp] - 1;
1937           point     = jj[nzcountb] - bs*tmp;
1938           idx       = ishift + bs2*block + j + bs*point;
1939           a->a[idx] = aa[nzcountb];
1940         }
1941         nzcountb++;
1942       }
1943     }
1944     /* zero out the mask elements we set */
1945     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1946   }
1947   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1948 
1949   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1950   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1951   ierr = PetscFree(aa);CHKERRQ(ierr);
1952   ierr = PetscFree(jj);CHKERRQ(ierr);
1953   ierr = PetscFree(mask);CHKERRQ(ierr);
1954 
1955   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatView_Private(B);CHKERRQ(ierr);
1958   *A = B;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1964 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1965 {
1966   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1967   MatScalar    *aa=a->a,*v,*v1;
1968   PetscScalar  *x,*b,*t,sum,d;
1969   PetscErrorCode ierr;
1970   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j;
1971   int          nz,nz1,*vj,*vj1,i;
1972 
1973   PetscFunctionBegin;
1974   its = its*lits;
1975   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1976 
1977   if (bs > 1)
1978     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1979 
1980   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1981   if (xx != bb) {
1982     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1983   } else {
1984     b = x;
1985   }
1986 
1987   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1988 
1989   if (flag & SOR_ZERO_INITIAL_GUESS) {
1990     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1991       for (i=0; i<m; i++)
1992         t[i] = b[i];
1993 
1994       for (i=0; i<m; i++){
1995         d  = *(aa + ai[i]);  /* diag[i] */
1996         v  = aa + ai[i] + 1;
1997         vj = aj + ai[i] + 1;
1998         nz = ai[i+1] - ai[i] - 1;
1999         x[i] = omega*t[i]/d;
2000         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2001         PetscLogFlops(2*nz-1);
2002       }
2003     }
2004 
2005     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2006       for (i=0; i<m; i++)
2007         t[i] = b[i];
2008 
2009       for (i=0; i<m-1; i++){  /* update rhs */
2010         v  = aa + ai[i] + 1;
2011         vj = aj + ai[i] + 1;
2012         nz = ai[i+1] - ai[i] - 1;
2013         while (nz--) t[*vj++] -= x[i]*(*v++);
2014         PetscLogFlops(2*nz-1);
2015       }
2016       for (i=m-1; i>=0; i--){
2017         d  = *(aa + ai[i]);
2018         v  = aa + ai[i] + 1;
2019         vj = aj + ai[i] + 1;
2020         nz = ai[i+1] - ai[i] - 1;
2021         sum = t[i];
2022         while (nz--) sum -= x[*vj++]*(*v++);
2023         PetscLogFlops(2*nz-1);
2024         x[i] =   (1-omega)*x[i] + omega*sum/d;
2025       }
2026     }
2027     its--;
2028   }
2029 
2030   while (its--) {
2031     /*
2032        forward sweep:
2033        for i=0,...,m-1:
2034          sum[i] = (b[i] - U(i,:)x )/d[i];
2035          x[i]   = (1-omega)x[i] + omega*sum[i];
2036          b      = b - x[i]*U^T(i,:);
2037 
2038     */
2039     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2040       for (i=0; i<m; i++)
2041         t[i] = b[i];
2042 
2043       for (i=0; i<m; i++){
2044         d  = *(aa + ai[i]);  /* diag[i] */
2045         v  = aa + ai[i] + 1; v1=v;
2046         vj = aj + ai[i] + 1; vj1=vj;
2047         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2048         sum = t[i];
2049         while (nz1--) sum -= (*v1++)*x[*vj1++];
2050         x[i] = (1-omega)*x[i] + omega*sum/d;
2051         while (nz--) t[*vj++] -= x[i]*(*v++);
2052         PetscLogFlops(4*nz-2);
2053       }
2054     }
2055 
2056   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2057       /*
2058        backward sweep:
2059        b = b - x[i]*U^T(i,:), i=0,...,n-2
2060        for i=m-1,...,0:
2061          sum[i] = (b[i] - U(i,:)x )/d[i];
2062          x[i]   = (1-omega)x[i] + omega*sum[i];
2063       */
2064       for (i=0; i<m; i++)
2065         t[i] = b[i];
2066 
2067       for (i=0; i<m-1; i++){  /* update rhs */
2068         v  = aa + ai[i] + 1;
2069         vj = aj + ai[i] + 1;
2070         nz = ai[i+1] - ai[i] - 1;
2071         while (nz--) t[*vj++] -= x[i]*(*v++);
2072         PetscLogFlops(2*nz-1);
2073       }
2074       for (i=m-1; i>=0; i--){
2075         d  = *(aa + ai[i]);
2076         v  = aa + ai[i] + 1;
2077         vj = aj + ai[i] + 1;
2078         nz = ai[i+1] - ai[i] - 1;
2079         sum = t[i];
2080         while (nz--) sum -= x[*vj++]*(*v++);
2081         PetscLogFlops(2*nz-1);
2082         x[i] =   (1-omega)*x[i] + omega*sum/d;
2083       }
2084     }
2085   }
2086 
2087   ierr = PetscFree(t);CHKERRQ(ierr);
2088   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2089   if (bb != xx) {
2090     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2091   }
2092 
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 
2097 
2098 
2099 
2100 
2101