xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 6ab11f9b6ca3a3a4762ba278e5cdc46bee711a5b)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i,mbs = a->mbs;
40 
41   PetscFunctionBegin;
42   if (a->diag) PetscFunctionReturn(0);
43 
44   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
45   ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
46   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNCT__
51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
53 {
54   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
55   PetscInt     n = a->mbs,i;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60 
61   if (oshift == 1) {
62     /* temporarily add 1 to i and j indices */
63     PetscInt nz = a->i[n];
64     for (i=0; i<nz; i++) a->j[i]++;
65     for (i=0; i<n+1; i++) a->i[i]++;
66     *ia = a->i; *ja = a->j;
67   } else {
68     *ia = a->i; *ja = a->j;
69   }
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
76 {
77   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
78   PetscInt     i,n = a->mbs;
79 
80   PetscFunctionBegin;
81   if (!ia) PetscFunctionReturn(0);
82 
83   if (oshift == 1) {
84     PetscInt nz = a->i[n]-1;
85     for (i=0; i<nz; i++) a->j[i]--;
86     for (i=0; i<n+1; i++) a->i[i]--;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
94 {
95   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
100   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);
101   if (a->row) {
102     ierr = ISDestroy(a->row);CHKERRQ(ierr);
103   }
104   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
105   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
106   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
107   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
108   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
109   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
110   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
111   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
112 
113   if (a->inew){
114     ierr = PetscFree(a->inew);CHKERRQ(ierr);
115     a->inew = 0;
116   }
117   ierr = PetscFree(a);CHKERRQ(ierr);
118 
119   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
120   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
121   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
122   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
131 {
132   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
133   PetscErrorCode ierr;
134 
135   PetscFunctionBegin;
136   switch (op) {
137   case MAT_ROW_ORIENTED:
138     a->roworiented = PETSC_TRUE;
139     break;
140   case MAT_COLUMN_ORIENTED:
141     a->roworiented = PETSC_FALSE;
142     break;
143   case MAT_COLUMNS_SORTED:
144     a->sorted = PETSC_TRUE;
145     break;
146   case MAT_COLUMNS_UNSORTED:
147     a->sorted = PETSC_FALSE;
148     break;
149   case MAT_KEEP_ZEROED_ROWS:
150     a->keepzeroedrows = PETSC_TRUE;
151     break;
152   case MAT_NO_NEW_NONZERO_LOCATIONS:
153     a->nonew = 1;
154     break;
155   case MAT_NEW_NONZERO_LOCATION_ERR:
156     a->nonew = -1;
157     break;
158   case MAT_NEW_NONZERO_ALLOCATION_ERR:
159     a->nonew = -2;
160     break;
161   case MAT_YES_NEW_NONZERO_LOCATIONS:
162     a->nonew = 0;
163     break;
164   case MAT_ROWS_SORTED:
165   case MAT_ROWS_UNSORTED:
166   case MAT_YES_NEW_DIAGONALS:
167   case MAT_IGNORE_OFF_PROC_ENTRIES:
168   case MAT_USE_HASH_TABLE:
169     ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr);
170     break;
171   case MAT_NO_NEW_DIAGONALS:
172     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
173   case MAT_NOT_SYMMETRIC:
174   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
175   case MAT_HERMITIAN:
176     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
177   case MAT_SYMMETRIC:
178   case MAT_STRUCTURALLY_SYMMETRIC:
179   case MAT_NOT_HERMITIAN:
180   case MAT_SYMMETRY_ETERNAL:
181   case MAT_NOT_SYMMETRY_ETERNAL:
182   case MAT_IGNORE_LOWER_TRIANGULAR:
183     a->ignore_ltriangular = PETSC_TRUE;
184     break;
185   case MAT_ERROR_LOWER_TRIANGULAR:
186     a->ignore_ltriangular = PETSC_FALSE;
187     break;
188   default:
189     SETERRQ(PETSC_ERR_SUP,"unknown option");
190   }
191   PetscFunctionReturn(0);
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
197 {
198   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
199   PetscErrorCode ierr;
200   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
201   MatScalar      *aa,*aa_i;
202   PetscScalar    *v_i;
203 
204   PetscFunctionBegin;
205   bs  = A->bs;
206   ai  = a->i;
207   aj  = a->j;
208   aa  = a->a;
209   bs2 = a->bs2;
210 
211   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
212 
213   bn  = row/bs;   /* Block number */
214   bp  = row % bs; /* Block position */
215   M   = ai[bn+1] - ai[bn];
216   *ncols = bs*M;
217 
218   if (v) {
219     *v = 0;
220     if (*ncols) {
221       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
222       for (i=0; i<M; i++) { /* for each block in the block row */
223         v_i  = *v + i*bs;
224         aa_i = aa + bs2*(ai[bn] + i);
225         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
226       }
227     }
228   }
229 
230   if (cols) {
231     *cols = 0;
232     if (*ncols) {
233       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
234       for (i=0; i<M; i++) { /* for each block in the block row */
235         cols_i = *cols + i*bs;
236         itmp  = bs*aj[ai[bn] + i];
237         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
238       }
239     }
240   }
241 
242   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
243   /* this segment is currently removed, so only entries in the upper triangle are obtained */
244 #ifdef column_search
245   v_i    = *v    + M*bs;
246   cols_i = *cols + M*bs;
247   for (i=0; i<bn; i++){ /* for each block row */
248     M = ai[i+1] - ai[i];
249     for (j=0; j<M; j++){
250       itmp = aj[ai[i] + j];    /* block column value */
251       if (itmp == bn){
252         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
253         for (k=0; k<bs; k++) {
254           *cols_i++ = i*bs+k;
255           *v_i++    = aa_i[k];
256         }
257         *ncols += bs;
258         break;
259       }
260     }
261   }
262 #endif
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
269 {
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
274   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
281 {
282   PetscErrorCode ierr;
283   PetscFunctionBegin;
284   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
291 {
292   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
293   PetscErrorCode    ierr;
294   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
295   char              *name;
296   PetscViewerFormat format;
297 
298   PetscFunctionBegin;
299   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
300   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
301   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
302     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
303   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
304     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
305   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
306     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
307     for (i=0; i<a->mbs; i++) {
308       for (j=0; j<bs; j++) {
309         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
310         for (k=a->i[i]; k<a->i[i+1]; k++) {
311           for (l=0; l<bs; l++) {
312 #if defined(PETSC_USE_COMPLEX)
313             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
314               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
315                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
316             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
317               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
318                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
319             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
320               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
321             }
322 #else
323             if (a->a[bs2*k + l*bs + j] != 0.0) {
324               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
325             }
326 #endif
327           }
328         }
329         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
330       }
331     }
332     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
333   } else {
334     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
335     for (i=0; i<a->mbs; i++) {
336       for (j=0; j<bs; j++) {
337         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
338         for (k=a->i[i]; k<a->i[i+1]; k++) {
339           for (l=0; l<bs; l++) {
340 #if defined(PETSC_USE_COMPLEX)
341             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
342               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
343                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
344             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
346                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
347             } else {
348               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
349             }
350 #else
351             ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
352 #endif
353           }
354         }
355         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
356       }
357     }
358     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
359   }
360   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
366 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
367 {
368   Mat            A = (Mat) Aa;
369   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
372   PetscMPIInt    rank;
373   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
374   MatScalar      *aa;
375   MPI_Comm       comm;
376   PetscViewer    viewer;
377 
378   PetscFunctionBegin;
379   /*
380     This is nasty. If this is called from an originally parallel matrix
381     then all processes call this,but only the first has the matrix so the
382     rest should return immediately.
383   */
384   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
385   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
386   if (rank) PetscFunctionReturn(0);
387 
388   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
389 
390   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
391   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
392 
393   /* loop over matrix elements drawing boxes */
394   color = PETSC_DRAW_BLUE;
395   for (i=0,row=0; i<mbs; i++,row+=bs) {
396     for (j=a->i[i]; j<a->i[i+1]; j++) {
397       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
398       x_l = a->j[j]*bs; x_r = x_l + 1.0;
399       aa = a->a + j*bs2;
400       for (k=0; k<bs; k++) {
401         for (l=0; l<bs; l++) {
402           if (PetscRealPart(*aa++) >=  0.) continue;
403           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
404         }
405       }
406     }
407   }
408   color = PETSC_DRAW_CYAN;
409   for (i=0,row=0; i<mbs; i++,row+=bs) {
410     for (j=a->i[i]; j<a->i[i+1]; j++) {
411       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
412       x_l = a->j[j]*bs; x_r = x_l + 1.0;
413       aa = a->a + j*bs2;
414       for (k=0; k<bs; k++) {
415         for (l=0; l<bs; l++) {
416           if (PetscRealPart(*aa++) != 0.) continue;
417           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
418         }
419       }
420     }
421   }
422 
423   color = PETSC_DRAW_RED;
424   for (i=0,row=0; i<mbs; i++,row+=bs) {
425     for (j=a->i[i]; j<a->i[i+1]; j++) {
426       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
427       x_l = a->j[j]*bs; x_r = x_l + 1.0;
428       aa = a->a + j*bs2;
429       for (k=0; k<bs; k++) {
430         for (l=0; l<bs; l++) {
431           if (PetscRealPart(*aa++) <= 0.) continue;
432           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
433         }
434       }
435     }
436   }
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
442 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
443 {
444   PetscErrorCode ierr;
445   PetscReal      xl,yl,xr,yr,w,h;
446   PetscDraw      draw;
447   PetscTruth     isnull;
448 
449   PetscFunctionBegin;
450 
451   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
452   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
453 
454   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
455   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
456   xr += w;    yr += h;  xl = -w;     yl = -h;
457   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
458   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
459   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatView_SeqSBAIJ"
465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
466 {
467   PetscErrorCode ierr;
468   PetscTruth     iascii,isdraw;
469 
470   PetscFunctionBegin;
471   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
472   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
473   if (iascii){
474     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
475   } else if (isdraw) {
476     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
477   } else {
478     Mat B;
479     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
480     ierr = MatView(B,viewer);CHKERRQ(ierr);
481     ierr = MatDestroy(B);CHKERRQ(ierr);
482   }
483   PetscFunctionReturn(0);
484 }
485 
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
490 {
491   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
492   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
493   PetscInt     *ai = a->i,*ailen = a->ilen;
494   PetscInt     brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
495   MatScalar    *ap,*aa = a->a,zero = 0.0;
496 
497   PetscFunctionBegin;
498   for (k=0; k<m; k++) { /* loop over rows */
499     row  = im[k]; brow = row/bs;
500     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
501     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
502     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
503     nrow = ailen[brow];
504     for (l=0; l<n; l++) { /* loop over columns */
505       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
506       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
507       col  = in[l] ;
508       bcol = col/bs;
509       cidx = col%bs;
510       ridx = row%bs;
511       high = nrow;
512       low  = 0; /* assume unsorted */
513       while (high-low > 5) {
514         t = (low+high)/2;
515         if (rp[t] > bcol) high = t;
516         else             low  = t;
517       }
518       for (i=low; i<high; i++) {
519         if (rp[i] > bcol) break;
520         if (rp[i] == bcol) {
521           *v++ = ap[bs2*i+bs*cidx+ridx];
522           goto finished;
523         }
524       }
525       *v++ = zero;
526        finished:;
527     }
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
536 {
537   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
538   PetscErrorCode  ierr;
539   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
540   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
541   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
542   PetscTruth      roworiented=a->roworiented;
543   const MatScalar *value = v;
544   MatScalar       *ap,*aa = a->a,*bap;
545 
546   PetscFunctionBegin;
547   if (roworiented) {
548     stepval = (n-1)*bs;
549   } else {
550     stepval = (m-1)*bs;
551   }
552   for (k=0; k<m; k++) { /* loop over added rows */
553     row  = im[k];
554     if (row < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
557 #endif
558     rp   = aj + ai[row];
559     ap   = aa + bs2*ai[row];
560     rmax = imax[row];
561     nrow = ailen[row];
562     low  = 0;
563     high = nrow;
564     for (l=0; l<n; l++) { /* loop over added columns */
565       if (in[l] < 0) continue;
566       col = in[l];
567 #if defined(PETSC_USE_DEBUG)
568       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
569 #endif
570       if (col < row) continue; /* ignore lower triangular block */
571       if (roworiented) {
572         value = v + k*(stepval+bs)*bs + l*bs;
573       } else {
574         value = v + l*(stepval+bs)*bs + k*bs;
575       }
576       if (col < lastcol) low = 0; else high = nrow;
577       lastcol = col;
578       while (high-low > 7) {
579         t = (low+high)/2;
580         if (rp[t] > col) high = t;
581         else             low  = t;
582       }
583       for (i=low; i<high; i++) {
584         if (rp[i] > col) break;
585         if (rp[i] == col) {
586           bap  = ap +  bs2*i;
587           if (roworiented) {
588             if (is == ADD_VALUES) {
589               for (ii=0; ii<bs; ii++,value+=stepval) {
590                 for (jj=ii; jj<bs2; jj+=bs) {
591                   bap[jj] += *value++;
592                 }
593               }
594             } else {
595               for (ii=0; ii<bs; ii++,value+=stepval) {
596                 for (jj=ii; jj<bs2; jj+=bs) {
597                   bap[jj] = *value++;
598                 }
599                }
600             }
601           } else {
602             if (is == ADD_VALUES) {
603               for (ii=0; ii<bs; ii++,value+=stepval) {
604                 for (jj=0; jj<bs; jj++) {
605                   *bap++ += *value++;
606                 }
607               }
608             } else {
609               for (ii=0; ii<bs; ii++,value+=stepval) {
610                 for (jj=0; jj<bs; jj++) {
611                   *bap++  = *value++;
612                 }
613               }
614             }
615           }
616           goto noinsert2;
617         }
618       }
619       if (nonew == 1) goto noinsert2;
620       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
621       if (nrow >= rmax) {
622         /* there is no extra room in row, therefore enlarge */
623         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
624         MatScalar *new_a;
625 
626         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
627 
628         /* malloc new storage space */
629         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
630 
631         /* copy over old data into new slots */
632         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
633         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
634         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
635         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
636         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
637         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
638         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
639         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
640         /* free up old matrix storage */
641         ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);
642         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
643         a->singlemalloc = PETSC_TRUE;
644 
645         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
646         rmax = imax[row] = imax[row] + CHUNKSIZE;
647         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
648         a->maxnz += bs2*CHUNKSIZE;
649         a->reallocs++;
650         a->nz++;
651       }
652       N = nrow++ - 1;
653       /* shift up all the later entries in this row */
654       for (ii=N; ii>=i; ii--) {
655         rp[ii+1] = rp[ii];
656         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
657       }
658       if (N >= i) {
659         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
660       }
661       rp[i] = col;
662       bap   = ap +  bs2*i;
663       if (roworiented) {
664         for (ii=0; ii<bs; ii++,value+=stepval) {
665           for (jj=ii; jj<bs2; jj+=bs) {
666             bap[jj] = *value++;
667           }
668         }
669       } else {
670         for (ii=0; ii<bs; ii++,value+=stepval) {
671           for (jj=0; jj<bs; jj++) {
672             *bap++  = *value++;
673           }
674         }
675        }
676     noinsert2:;
677       low = i;
678     }
679     ailen[row] = nrow;
680   }
681    PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
686 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
687 {
688   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
689   PetscErrorCode ierr;
690   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
691   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
692   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
693   MatScalar      *aa = a->a,*ap;
694 
695   PetscFunctionBegin;
696   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
697 
698   if (m) rmax = ailen[0];
699   for (i=1; i<mbs; i++) {
700     /* move each row back by the amount of empty slots (fshift) before it*/
701     fshift += imax[i-1] - ailen[i-1];
702      rmax   = PetscMax(rmax,ailen[i]);
703      if (fshift) {
704        ip = aj + ai[i]; ap = aa + bs2*ai[i];
705        N = ailen[i];
706        for (j=0; j<N; j++) {
707          ip[j-fshift] = ip[j];
708          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
709        }
710      }
711      ai[i] = ai[i-1] + ailen[i-1];
712   }
713   if (mbs) {
714     fshift += imax[mbs-1] - ailen[mbs-1];
715      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
716   }
717   /* reset ilen and imax for each row */
718   for (i=0; i<mbs; i++) {
719     ailen[i] = imax[i] = ai[i+1] - ai[i];
720   }
721   a->nz = ai[mbs];
722 
723   /* diagonals may have moved, reset it */
724   if (a->diag) {
725     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
726   }
727   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",
728                        m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
729   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
730   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
731   a->reallocs          = 0;
732   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
733   PetscFunctionReturn(0);
734 }
735 
736 /*
737    This function returns an array of flags which indicate the locations of contiguous
738    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
739    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
740    Assume: sizes should be long enough to hold all the values.
741 */
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
744 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
745 {
746   PetscInt   i,j,k,row;
747   PetscTruth flg;
748 
749   PetscFunctionBegin;
750    for (i=0,j=0; i<n; j++) {
751      row = idx[i];
752      if (row%bs!=0) { /* Not the begining of a block */
753        sizes[j] = 1;
754        i++;
755      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
756        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
757        i++;
758      } else { /* Begining of the block, so check if the complete block exists */
759        flg = PETSC_TRUE;
760        for (k=1; k<bs; k++) {
761          if (row+k != idx[i+k]) { /* break in the block */
762            flg = PETSC_FALSE;
763            break;
764          }
765        }
766        if (flg) { /* No break in the bs */
767          sizes[j] = bs;
768          i+= bs;
769        } else {
770          sizes[j] = 1;
771          i++;
772        }
773      }
774    }
775    *bs_max = j;
776    PetscFunctionReturn(0);
777 }
778 
779 
780 /* Only add/insert a(i,j) with i<=j (blocks).
781    Any a(i,j) with i>j input by user is ingored.
782 */
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
786 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
787 {
788   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
789   PetscErrorCode ierr;
790   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
791   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
792   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
793   PetscInt       ridx,cidx,bs2=a->bs2;
794   MatScalar      *ap,value,*aa=a->a,*bap;
795 
796   PetscFunctionBegin;
797   for (k=0; k<m; k++) { /* loop over added rows */
798     row  = im[k];       /* row number */
799     brow = row/bs;      /* block row number */
800     if (row < 0) continue;
801 #if defined(PETSC_USE_DEBUG)
802     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
803 #endif
804     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
805     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
806     rmax = imax[brow];  /* maximum space allocated for this row */
807     nrow = ailen[brow]; /* actual length of this row */
808     low  = 0;
809 
810     for (l=0; l<n; l++) { /* loop over added columns */
811       if (in[l] < 0) continue;
812 #if defined(PETSC_USE_DEBUG)
813       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
814 #endif
815       col = in[l];
816       bcol = col/bs;              /* block col number */
817 
818       if (brow > bcol) {
819         if (a->ignore_ltriangular){
820           continue; /* ignore lower triangular values */
821         } else {
822           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
823         }
824       }
825 
826       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
827       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
828         /* element value a(k,l) */
829         if (roworiented) {
830           value = v[l + k*n];
831         } else {
832           value = v[k + l*m];
833         }
834 
835         /* move pointer bap to a(k,l) quickly and add/insert value */
836         if (col < lastcol) low = 0; high = nrow;
837         lastcol = col;
838         while (high-low > 7) {
839           t = (low+high)/2;
840           if (rp[t] > bcol) high = t;
841           else              low  = t;
842         }
843         for (i=low; i<high; i++) {
844           if (rp[i] > bcol) break;
845           if (rp[i] == bcol) {
846             bap  = ap +  bs2*i + bs*cidx + ridx;
847             if (is == ADD_VALUES) *bap += value;
848             else                  *bap  = value;
849             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
850             if (brow == bcol && ridx < cidx){
851               bap  = ap +  bs2*i + bs*ridx + cidx;
852               if (is == ADD_VALUES) *bap += value;
853               else                  *bap  = value;
854             }
855             goto noinsert1;
856           }
857         }
858 
859         if (nonew == 1) goto noinsert1;
860         else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
861         if (nrow >= rmax) {
862           /* there is no extra room in row, therefore enlarge */
863           PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
864           MatScalar *new_a;
865 
866           if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
867 
868           /* Malloc new storage space */
869           ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
870 
871           /* copy over old data into new slots */
872           for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
873           for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
874           ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
875           len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
876           ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
877           ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
878           ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
879           ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
880           /* free up old matrix storage */
881           ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);
882           aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
883           a->singlemalloc = PETSC_TRUE;
884 
885           rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
886           rmax = imax[brow] = imax[brow] + CHUNKSIZE;
887           ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
888           a->maxnz += bs2*CHUNKSIZE;
889           a->reallocs++;
890           a->nz++;
891         }
892 
893         N = nrow++ - 1;
894         /* shift up all the later entries in this row */
895         for (ii=N; ii>=i; ii--) {
896           rp[ii+1] = rp[ii];
897           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
898         }
899         if (N>=i) {
900           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
901         }
902         rp[i]                      = bcol;
903         ap[bs2*i + bs*cidx + ridx] = value;
904       noinsert1:;
905         low = i;
906       }
907     }   /* end of loop over added columns */
908     ailen[brow] = nrow;
909   }   /* end of loop over added rows */
910   PetscFunctionReturn(0);
911 }
912 
913 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
914 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
915 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
916 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
917 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
918 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
919 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
920 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
921 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
922 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
923 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
924 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
925 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
926 
927 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
928 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
929 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
930 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
931 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
932 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
933 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
934 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
935 
936 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
937 
938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
946 
947 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*);
948 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*);
949 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*);
950 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*);
951 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*);
952 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*);
953 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*);
954 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*);
955 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
956 
957 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
962 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
963 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
964 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
965 
966 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
967 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
968 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
969 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
970 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
971 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
972 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
973 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
974 
975 #ifdef HAVE_MatICCFactor
976 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
977 #undef __FUNCT__
978 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
979 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
980 {
981   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
982   Mat            outA;
983   PetscErrorCode ierr;
984   PetscTruth     row_identity,col_identity;
985 
986   PetscFunctionBegin;
987   outA          = inA;
988   inA->factor   = FACTOR_CHOLESKY;
989 
990   if (!a->diag) {
991     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
992   }
993   /*
994     Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
995     for ILU(0) factorization with natural ordering
996   */
997   switch (a->bs) {
998   case 1:
999     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1000     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1001     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1002     ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr);
1003   case 2:
1004     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1005     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1006     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1007     ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr);
1008     break;
1009   case 3:
1010      inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1011      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1012      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1013      ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr);
1014      break;
1015   case 4:
1016     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1017     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1018     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1019     ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr);
1020     break;
1021   case 5:
1022     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1023     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1024     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1025     ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr);
1026     break;
1027   case 6:
1028     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1029     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1030     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1031     ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr);
1032     break;
1033   case 7:
1034     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1035     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1036     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1037     ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr);
1038     break;
1039   default:
1040     a->row        = row;
1041     a->icol       = col;
1042     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1043     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1044 
1045     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1046     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1047     ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1048 
1049     if (!a->solve_work) {
1050       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1051       ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1052     }
1053   }
1054 
1055   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 #endif
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1062 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1063 {
1064   static PetscTruth called = PETSC_FALSE;
1065   MPI_Comm          comm = A->comm;
1066   PetscErrorCode    ierr;
1067 
1068   PetscFunctionBegin;
1069   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1070   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1071   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1072   ierr = (*PetscHelpPrintf)(comm,"  -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 EXTERN_C_BEGIN
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1079 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1080 {
1081   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1082   PetscInt     i,nz,n;
1083 
1084   PetscFunctionBegin;
1085   nz = baij->maxnz;
1086   n  = mat->n;
1087   for (i=0; i<nz; i++) {
1088     baij->j[i] = indices[i];
1089   }
1090    baij->nz = nz;
1091    for (i=0; i<n; i++) {
1092      baij->ilen[i] = baij->imax[i];
1093    }
1094    PetscFunctionReturn(0);
1095 }
1096 EXTERN_C_END
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1100 /*@
1101   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1102   in the matrix.
1103 
1104   Input Parameters:
1105   +  mat     - the SeqSBAIJ matrix
1106   -  indices - the column indices
1107 
1108   Level: advanced
1109 
1110   Notes:
1111   This can be called if you have precomputed the nonzero structure of the
1112   matrix and want to provide it to the matrix object to improve the performance
1113   of the MatSetValues() operation.
1114 
1115   You MUST have set the correct numbers of nonzeros per row in the call to
1116   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1117 
1118   MUST be called before any calls to MatSetValues()
1119 
1120   .seealso: MatCreateSeqSBAIJ
1121 @*/
1122 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1123 {
1124   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1125 
1126   PetscFunctionBegin;
1127   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1128   PetscValidPointer(indices,2);
1129   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1130   if (f) {
1131     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1132   } else {
1133     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1140 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1141 {
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1151 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1152 {
1153   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1154   PetscFunctionBegin;
1155   *array = a->a;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1161 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1162 {
1163   PetscFunctionBegin;
1164   PetscFunctionReturn(0);
1165  }
1166 
1167 #include "petscblaslapack.h"
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1170 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1171 {
1172   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1173   PetscErrorCode ierr;
1174   PetscInt       i,bs=Y->bs,bs2,j;
1175   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1176 
1177   PetscFunctionBegin;
1178   if (str == SAME_NONZERO_PATTERN) {
1179     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1180   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1181     if (y->xtoy && y->XtoY != X) {
1182       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1183       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1184     }
1185     if (!y->xtoy) { /* get xtoy */
1186       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1187       y->XtoY = X;
1188     }
1189     bs2 = bs*bs;
1190     for (i=0; i<x->nz; i++) {
1191       j = 0;
1192       while (j < bs2){
1193         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1194         j++;
1195       }
1196     }
1197     ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
1198   } else {
1199     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1206 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1207 {
1208   PetscFunctionBegin;
1209   *flg = PETSC_TRUE;
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1215 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1216 {
1217    PetscFunctionBegin;
1218    *flg = PETSC_TRUE;
1219    PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1224 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1225  {
1226    PetscFunctionBegin;
1227    *flg = PETSC_FALSE;
1228    PetscFunctionReturn(0);
1229  }
1230 
1231 /* -------------------------------------------------------------------*/
1232 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1233        MatGetRow_SeqSBAIJ,
1234        MatRestoreRow_SeqSBAIJ,
1235        MatMult_SeqSBAIJ_N,
1236 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1237        MatMult_SeqSBAIJ_N,
1238        MatMultAdd_SeqSBAIJ_N,
1239        MatSolve_SeqSBAIJ_N,
1240        0,
1241        0,
1242 /*10*/ 0,
1243        0,
1244        MatCholeskyFactor_SeqSBAIJ,
1245        MatRelax_SeqSBAIJ,
1246        MatTranspose_SeqSBAIJ,
1247 /*15*/ MatGetInfo_SeqSBAIJ,
1248        MatEqual_SeqSBAIJ,
1249        MatGetDiagonal_SeqSBAIJ,
1250        MatDiagonalScale_SeqSBAIJ,
1251        MatNorm_SeqSBAIJ,
1252 /*20*/ 0,
1253        MatAssemblyEnd_SeqSBAIJ,
1254        0,
1255        MatSetOption_SeqSBAIJ,
1256        MatZeroEntries_SeqSBAIJ,
1257 /*25*/ 0,
1258        0,
1259        0,
1260        MatCholeskyFactorSymbolic_SeqSBAIJ,
1261        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1262 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1263        0,
1264        MatICCFactorSymbolic_SeqSBAIJ,
1265        MatGetArray_SeqSBAIJ,
1266        MatRestoreArray_SeqSBAIJ,
1267 /*35*/ MatDuplicate_SeqSBAIJ,
1268        0,
1269        0,
1270        0,
1271        0,
1272 /*40*/ MatAXPY_SeqSBAIJ,
1273        MatGetSubMatrices_SeqSBAIJ,
1274        MatIncreaseOverlap_SeqSBAIJ,
1275        MatGetValues_SeqSBAIJ,
1276        0,
1277 /*45*/ MatPrintHelp_SeqSBAIJ,
1278        MatScale_SeqSBAIJ,
1279        0,
1280        0,
1281        0,
1282 /*50*/ 0,
1283        MatGetRowIJ_SeqSBAIJ,
1284        MatRestoreRowIJ_SeqSBAIJ,
1285        0,
1286        0,
1287 /*55*/ 0,
1288        0,
1289        0,
1290        0,
1291        MatSetValuesBlocked_SeqSBAIJ,
1292 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1293        0,
1294        0,
1295        MatGetPetscMaps_Petsc,
1296        0,
1297 /*65*/ 0,
1298        0,
1299        0,
1300        0,
1301        0,
1302 /*70*/ MatGetRowMax_SeqSBAIJ,
1303        0,
1304        0,
1305        0,
1306        0,
1307 /*75*/ 0,
1308        0,
1309        0,
1310        0,
1311        0,
1312 /*80*/ 0,
1313        0,
1314        0,
1315 #if !defined(PETSC_USE_COMPLEX)
1316        MatGetInertia_SeqSBAIJ,
1317 #else
1318        0,
1319 #endif
1320        MatLoad_SeqSBAIJ,
1321 /*85*/ MatIsSymmetric_SeqSBAIJ,
1322        MatIsHermitian_SeqSBAIJ,
1323        MatIsStructurallySymmetric_SeqSBAIJ,
1324        0,
1325        0,
1326 /*90*/ 0,
1327        0,
1328        0,
1329        0,
1330        0,
1331 /*95*/ 0,
1332        0,
1333        0,
1334        0};
1335 
1336 EXTERN_C_BEGIN
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1339 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1340 {
1341   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1342   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   if (aij->nonew != 1) {
1347     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1348   }
1349 
1350   /* allocate space for values if not already there */
1351   if (!aij->saved_values) {
1352     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1353   }
1354 
1355   /* copy values over */
1356   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 EXTERN_C_END
1360 
1361 EXTERN_C_BEGIN
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1364 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1365 {
1366   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1367   PetscErrorCode ierr;
1368   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1369 
1370   PetscFunctionBegin;
1371   if (aij->nonew != 1) {
1372     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1373   }
1374   if (!aij->saved_values) {
1375     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1376   }
1377 
1378   /* copy values over */
1379   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382 EXTERN_C_END
1383 
1384 EXTERN_C_BEGIN
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1387 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1388 {
1389   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1390   PetscErrorCode ierr;
1391   PetscInt       i,mbs,bs2;
1392   PetscTruth     skipallocation = PETSC_FALSE,flg;
1393 
1394   PetscFunctionBegin;
1395   B->preallocated = PETSC_TRUE;
1396   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1397   mbs  = B->m/bs;
1398   bs2  = bs*bs;
1399 
1400   if (mbs*bs != B->m) {
1401     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1402   }
1403 
1404   if (nz == MAT_SKIP_ALLOCATION) {
1405     skipallocation = PETSC_TRUE;
1406     nz             = 0;
1407   }
1408 
1409   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1410   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1411   if (nnz) {
1412     for (i=0; i<mbs; i++) {
1413       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1414       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1415     }
1416   }
1417 
1418   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1419   if (!flg) {
1420     switch (bs) {
1421     case 1:
1422       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1423       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1424       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1425       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1426       B->ops->mult            = MatMult_SeqSBAIJ_1;
1427       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1428       break;
1429     case 2:
1430       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1431       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1432       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_2;
1433       B->ops->mult            = MatMult_SeqSBAIJ_2;
1434       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1435       break;
1436     case 3:
1437       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1438       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1439       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_3;
1440       B->ops->mult            = MatMult_SeqSBAIJ_3;
1441       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1442       break;
1443     case 4:
1444       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1445       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1446       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_4;
1447       B->ops->mult            = MatMult_SeqSBAIJ_4;
1448       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1449       break;
1450     case 5:
1451       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1452       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1453       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_5;
1454       B->ops->mult            = MatMult_SeqSBAIJ_5;
1455       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1456       break;
1457     case 6:
1458       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1459       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1460       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_6;
1461       B->ops->mult            = MatMult_SeqSBAIJ_6;
1462       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1463       break;
1464     case 7:
1465       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1466       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1467       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_7;
1468       B->ops->mult            = MatMult_SeqSBAIJ_7;
1469       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1470       break;
1471     }
1472   }
1473 
1474   b->mbs = mbs;
1475   b->nbs = mbs;
1476   if (!skipallocation) {
1477     /* b->ilen will count nonzeros in each block row so far. */
1478     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1479     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1480     if (!nnz) {
1481       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1482       else if (nz <= 0)        nz = 1;
1483       for (i=0; i<mbs; i++) {
1484         b->imax[i] = nz;
1485       }
1486       nz = nz*mbs; /* total nz */
1487     } else {
1488       nz = 0;
1489       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1490     }
1491     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1492 
1493     /* allocate the matrix space */
1494     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
1495     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1496     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1497     b->singlemalloc = PETSC_TRUE;
1498 
1499     /* pointer to beginning of each row */
1500     b->i[0] = 0;
1501     for (i=1; i<mbs+1; i++) {
1502       b->i[i] = b->i[i-1] + b->imax[i-1];
1503     }
1504   }
1505 
1506   B->bs               = bs;
1507   b->bs2              = bs2;
1508   b->nz             = 0;
1509   b->maxnz          = nz*bs2;
1510 
1511   b->inew             = 0;
1512   b->jnew             = 0;
1513   b->anew             = 0;
1514   b->a2anew           = 0;
1515   b->permute          = PETSC_FALSE;
1516   PetscFunctionReturn(0);
1517 }
1518 EXTERN_C_END
1519 
1520 EXTERN_C_BEGIN
1521 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
1522 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*);
1523 EXTERN_C_END
1524 
1525 /*MC
1526   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1527   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1528 
1529   Options Database Keys:
1530   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1531 
1532   Level: beginner
1533 
1534   .seealso: MatCreateSeqSBAIJ
1535 M*/
1536 
1537 EXTERN_C_BEGIN
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1540 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1541 {
1542   Mat_SeqSBAIJ   *b;
1543   PetscErrorCode ierr;
1544   PetscMPIInt    size;
1545   PetscTruth     flg;
1546 
1547   PetscFunctionBegin;
1548   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1549   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1550   B->m = B->M = PetscMax(B->m,B->M);
1551   B->n = B->N = PetscMax(B->n,B->N);
1552 
1553   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1554   B->data = (void*)b;
1555   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1556   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1557   B->ops->view        = MatView_SeqSBAIJ;
1558   B->factor           = 0;
1559   B->mapping          = 0;
1560   b->row              = 0;
1561   b->icol             = 0;
1562   b->reallocs         = 0;
1563   b->saved_values     = 0;
1564 
1565   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1566   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1567 
1568   b->sorted           = PETSC_FALSE;
1569   b->roworiented      = PETSC_TRUE;
1570   b->nonew            = 0;
1571   b->diag             = 0;
1572   b->solve_work       = 0;
1573   b->mult_work        = 0;
1574   B->spptr            = 0;
1575   b->keepzeroedrows   = PETSC_FALSE;
1576   b->xtoy             = 0;
1577   b->XtoY             = 0;
1578 
1579   b->inew             = 0;
1580   b->jnew             = 0;
1581   b->anew             = 0;
1582   b->a2anew           = 0;
1583   b->permute          = PETSC_FALSE;
1584 
1585   b->ignore_ltriangular = PETSC_FALSE;
1586   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1587   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1588 
1589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1590                                      "MatStoreValues_SeqSBAIJ",
1591                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1593                                      "MatRetrieveValues_SeqSBAIJ",
1594                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1596                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1597                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1599                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1600                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1601   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1602                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1603                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1605                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1606                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1607 
1608   B->symmetric                  = PETSC_TRUE;
1609   B->structurally_symmetric     = PETSC_TRUE;
1610   B->symmetric_set              = PETSC_TRUE;
1611   B->structurally_symmetric_set = PETSC_TRUE;
1612   PetscFunctionReturn(0);
1613 }
1614 EXTERN_C_END
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1618 /*@C
1619    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1620    compressed row) format.  For good matrix assembly performance the
1621    user should preallocate the matrix storage by setting the parameter nz
1622    (or the array nnz).  By setting these parameters accurately, performance
1623    during matrix assembly can be increased by more than a factor of 50.
1624 
1625    Collective on Mat
1626 
1627    Input Parameters:
1628 +  A - the symmetric matrix
1629 .  bs - size of block
1630 .  nz - number of block nonzeros per block row (same for all rows)
1631 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1632          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1633 
1634    Options Database Keys:
1635 .   -mat_no_unroll - uses code that does not unroll the loops in the
1636                      block calculations (much slower)
1637 .    -mat_block_size - size of the blocks to use
1638 
1639    Level: intermediate
1640 
1641    Notes:
1642    Specify the preallocated storage with either nz or nnz (not both).
1643    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1644    allocation.  For additional details, see the users manual chapter on
1645    matrices.
1646 
1647    If the nnz parameter is given then the nz parameter is ignored
1648 
1649 
1650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1651 @*/
1652 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1653 {
1654   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1655 
1656   PetscFunctionBegin;
1657   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1658   if (f) {
1659     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatCreateSeqSBAIJ"
1666 /*@C
1667    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1668    compressed row) format.  For good matrix assembly performance the
1669    user should preallocate the matrix storage by setting the parameter nz
1670    (or the array nnz).  By setting these parameters accurately, performance
1671    during matrix assembly can be increased by more than a factor of 50.
1672 
1673    Collective on MPI_Comm
1674 
1675    Input Parameters:
1676 +  comm - MPI communicator, set to PETSC_COMM_SELF
1677 .  bs - size of block
1678 .  m - number of rows, or number of columns
1679 .  nz - number of block nonzeros per block row (same for all rows)
1680 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1681          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1682 
1683    Output Parameter:
1684 .  A - the symmetric matrix
1685 
1686    Options Database Keys:
1687 .   -mat_no_unroll - uses code that does not unroll the loops in the
1688                      block calculations (much slower)
1689 .    -mat_block_size - size of the blocks to use
1690 
1691    Level: intermediate
1692 
1693    Notes:
1694 
1695    Specify the preallocated storage with either nz or nnz (not both).
1696    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1697    allocation.  For additional details, see the users manual chapter on
1698    matrices.
1699 
1700    If the nnz parameter is given then the nz parameter is ignored
1701 
1702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1703 @*/
1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1705 {
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1710   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1711   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1717 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1718 {
1719   Mat            C;
1720   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1721   PetscErrorCode ierr;
1722   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1723 
1724   PetscFunctionBegin;
1725   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1726 
1727   *B = 0;
1728   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1729   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1730   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1731   c    = (Mat_SeqSBAIJ*)C->data;
1732 
1733   C->preallocated   = PETSC_TRUE;
1734   C->factor         = A->factor;
1735   c->row            = 0;
1736   c->icol           = 0;
1737   c->saved_values   = 0;
1738   c->keepzeroedrows = a->keepzeroedrows;
1739   C->assembled      = PETSC_TRUE;
1740 
1741   C->M    = A->M;
1742   C->N    = A->N;
1743   C->bs   = A->bs;
1744   c->bs2  = a->bs2;
1745   c->mbs  = a->mbs;
1746   c->nbs  = a->nbs;
1747 
1748   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
1749   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
1750   for (i=0; i<mbs; i++) {
1751     c->imax[i] = a->imax[i];
1752     c->ilen[i] = a->ilen[i];
1753   }
1754   ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1755 
1756   /* allocate the matrix space */
1757   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1758   c->singlemalloc = PETSC_TRUE;
1759   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1760   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1761   if (mbs > 0) {
1762     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1763     if (cpvalues == MAT_COPY_VALUES) {
1764       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1765     } else {
1766       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1767     }
1768   }
1769 
1770   c->sorted      = a->sorted;
1771   c->roworiented = a->roworiented;
1772   c->nonew       = a->nonew;
1773 
1774   if (a->diag) {
1775     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1776     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1777     for (i=0; i<mbs; i++) {
1778       c->diag[i] = a->diag[i];
1779     }
1780   } else c->diag        = 0;
1781   c->nz               = a->nz;
1782   c->maxnz            = a->maxnz;
1783   c->solve_work         = 0;
1784   c->mult_work          = 0;
1785   *B = C;
1786   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1792 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1793 {
1794   Mat_SeqSBAIJ   *a;
1795   Mat            B;
1796   PetscErrorCode ierr;
1797   int            fd;
1798   PetscMPIInt    size;
1799   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1800   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1801   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1802   PetscInt       *masked,nmask,tmp,bs2,ishift;
1803   PetscScalar    *aa;
1804   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1805 
1806   PetscFunctionBegin;
1807   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1808   bs2  = bs*bs;
1809 
1810   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1811   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1812   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1813   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1814   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1815   M = header[1]; N = header[2]; nz = header[3];
1816 
1817   if (header[3] < 0) {
1818     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1819   }
1820 
1821   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1822 
1823   /*
1824      This code adds extra rows to make sure the number of rows is
1825     divisible by the blocksize
1826   */
1827   mbs        = M/bs;
1828   extra_rows = bs - M + bs*(mbs);
1829   if (extra_rows == bs) extra_rows = 0;
1830   else                  mbs++;
1831   if (extra_rows) {
1832     ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
1833   }
1834 
1835   /* read in row lengths */
1836   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1837   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1838   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1839 
1840   /* read in column indices */
1841   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1842   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1843   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1844 
1845   /* loop over row lengths determining block row lengths */
1846   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1847   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1848   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1849   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1850   masked   = mask + mbs;
1851   rowcount = 0; nzcount = 0;
1852   for (i=0; i<mbs; i++) {
1853     nmask = 0;
1854     for (j=0; j<bs; j++) {
1855       kmax = rowlengths[rowcount];
1856       for (k=0; k<kmax; k++) {
1857         tmp = jj[nzcount++]/bs;   /* block col. index */
1858         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1859       }
1860       rowcount++;
1861     }
1862     s_browlengths[i] += nmask;
1863 
1864     /* zero out the mask elements we set */
1865     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1866   }
1867 
1868   /* create our matrix */
1869   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1870   ierr = MatSetType(B,type);CHKERRQ(ierr);
1871   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1872   a = (Mat_SeqSBAIJ*)B->data;
1873 
1874   /* set matrix "i" values */
1875   a->i[0] = 0;
1876   for (i=1; i<= mbs; i++) {
1877     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1878     a->ilen[i-1] = s_browlengths[i-1];
1879   }
1880   a->nz = a->i[mbs];
1881 
1882   /* read in nonzero values */
1883   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1884   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1885   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1886 
1887   /* set "a" and "j" values into matrix */
1888   nzcount = 0; jcount = 0;
1889   for (i=0; i<mbs; i++) {
1890     nzcountb = nzcount;
1891     nmask    = 0;
1892     for (j=0; j<bs; j++) {
1893       kmax = rowlengths[i*bs+j];
1894       for (k=0; k<kmax; k++) {
1895         tmp = jj[nzcount++]/bs; /* block col. index */
1896         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1897       }
1898     }
1899     /* sort the masked values */
1900     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1901 
1902     /* set "j" values into matrix */
1903     maskcount = 1;
1904     for (j=0; j<nmask; j++) {
1905       a->j[jcount++]  = masked[j];
1906       mask[masked[j]] = maskcount++;
1907     }
1908 
1909     /* set "a" values into matrix */
1910     ishift = bs2*a->i[i];
1911     for (j=0; j<bs; j++) {
1912       kmax = rowlengths[i*bs+j];
1913       for (k=0; k<kmax; k++) {
1914         tmp       = jj[nzcountb]/bs ; /* block col. index */
1915         if (tmp >= i){
1916           block     = mask[tmp] - 1;
1917           point     = jj[nzcountb] - bs*tmp;
1918           idx       = ishift + bs2*block + j + bs*point;
1919           a->a[idx] = aa[nzcountb];
1920         }
1921         nzcountb++;
1922       }
1923     }
1924     /* zero out the mask elements we set */
1925     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1926   }
1927   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1928 
1929   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1930   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1931   ierr = PetscFree(aa);CHKERRQ(ierr);
1932   ierr = PetscFree(jj);CHKERRQ(ierr);
1933   ierr = PetscFree(mask);CHKERRQ(ierr);
1934 
1935   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1936   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   ierr = MatView_Private(B);CHKERRQ(ierr);
1938   *A = B;
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1944 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1945 {
1946   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1947   MatScalar      *aa=a->a,*v,*v1;
1948   PetscScalar    *x,*b,*t,sum,d;
1949   PetscErrorCode ierr;
1950   PetscInt       m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j;
1951   PetscInt       nz,nz1,*vj,*vj1,i;
1952 
1953   PetscFunctionBegin;
1954   its = its*lits;
1955   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1956 
1957   if (bs > 1)
1958     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1959 
1960   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1961   if (xx != bb) {
1962     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1963   } else {
1964     b = x;
1965   }
1966 
1967   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1968 
1969   if (flag & SOR_ZERO_INITIAL_GUESS) {
1970     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1971       for (i=0; i<m; i++)
1972         t[i] = b[i];
1973 
1974       for (i=0; i<m; i++){
1975         d  = *(aa + ai[i]);  /* diag[i] */
1976         v  = aa + ai[i] + 1;
1977         vj = aj + ai[i] + 1;
1978         nz = ai[i+1] - ai[i] - 1;
1979         x[i] = omega*t[i]/d;
1980         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1981         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
1982       }
1983     }
1984 
1985     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1986       for (i=0; i<m; i++)
1987         t[i] = b[i];
1988 
1989       for (i=0; i<m-1; i++){  /* update rhs */
1990         v  = aa + ai[i] + 1;
1991         vj = aj + ai[i] + 1;
1992         nz = ai[i+1] - ai[i] - 1;
1993         while (nz--) t[*vj++] -= x[i]*(*v++);
1994         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
1995       }
1996       for (i=m-1; i>=0; i--){
1997         d  = *(aa + ai[i]);
1998         v  = aa + ai[i] + 1;
1999         vj = aj + ai[i] + 1;
2000         nz = ai[i+1] - ai[i] - 1;
2001         sum = t[i];
2002         while (nz--) sum -= x[*vj++]*(*v++);
2003         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2004         x[i] =   (1-omega)*x[i] + omega*sum/d;
2005       }
2006     }
2007     its--;
2008   }
2009 
2010   while (its--) {
2011     /*
2012        forward sweep:
2013        for i=0,...,m-1:
2014          sum[i] = (b[i] - U(i,:)x )/d[i];
2015          x[i]   = (1-omega)x[i] + omega*sum[i];
2016          b      = b - x[i]*U^T(i,:);
2017 
2018     */
2019     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2020       for (i=0; i<m; i++)
2021         t[i] = b[i];
2022 
2023       for (i=0; i<m; i++){
2024         d  = *(aa + ai[i]);  /* diag[i] */
2025         v  = aa + ai[i] + 1; v1=v;
2026         vj = aj + ai[i] + 1; vj1=vj;
2027         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2028         sum = t[i];
2029         while (nz1--) sum -= (*v1++)*x[*vj1++];
2030         x[i] = (1-omega)*x[i] + omega*sum/d;
2031         while (nz--) t[*vj++] -= x[i]*(*v++);
2032         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2033       }
2034     }
2035 
2036   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2037       /*
2038        backward sweep:
2039        b = b - x[i]*U^T(i,:), i=0,...,n-2
2040        for i=m-1,...,0:
2041          sum[i] = (b[i] - U(i,:)x )/d[i];
2042          x[i]   = (1-omega)x[i] + omega*sum[i];
2043       */
2044       for (i=0; i<m; i++)
2045         t[i] = b[i];
2046 
2047       for (i=0; i<m-1; i++){  /* update rhs */
2048         v  = aa + ai[i] + 1;
2049         vj = aj + ai[i] + 1;
2050         nz = ai[i+1] - ai[i] - 1;
2051         while (nz--) t[*vj++] -= x[i]*(*v++);
2052         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2053       }
2054       for (i=m-1; i>=0; i--){
2055         d  = *(aa + ai[i]);
2056         v  = aa + ai[i] + 1;
2057         vj = aj + ai[i] + 1;
2058         nz = ai[i+1] - ai[i] - 1;
2059         sum = t[i];
2060         while (nz--) sum -= x[*vj++]*(*v++);
2061         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2062         x[i] =   (1-omega)*x[i] + omega*sum/d;
2063       }
2064     }
2065   }
2066 
2067   ierr = PetscFree(t);CHKERRQ(ierr);
2068   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2069   if (bb != xx) {
2070     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 
2076 
2077 
2078 
2079 
2080