xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 2f59403fd2df48e396e6efa2e07d12d9da4cd99d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i,mbs = a->mbs;
40 
41   PetscFunctionBegin;
42   if (a->diag) PetscFunctionReturn(0);
43 
44   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
45   ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
46   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNCT__
51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
53 {
54   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
55   PetscInt     n = a->mbs,i;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60 
61   if (oshift == 1) {
62     /* temporarily add 1 to i and j indices */
63     PetscInt nz = a->i[n];
64     for (i=0; i<nz; i++) a->j[i]++;
65     for (i=0; i<n+1; i++) a->i[i]++;
66     *ia = a->i; *ja = a->j;
67   } else {
68     *ia = a->i; *ja = a->j;
69   }
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
76 {
77   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
78   PetscInt     i,n = a->mbs;
79 
80   PetscFunctionBegin;
81   if (!ia) PetscFunctionReturn(0);
82 
83   if (oshift == 1) {
84     PetscInt nz = a->i[n]-1;
85     for (i=0; i<nz; i++) a->j[i]--;
86     for (i=0; i<n+1; i++) a->i[i]--;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
94 {
95   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99 #if defined(PETSC_USE_LOG)
100   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
101 #endif
102   if (a->freedata){
103     ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
104   }
105   if (a->row) {
106     ierr = ISDestroy(a->row);CHKERRQ(ierr);
107   }
108   ierr = PetscFree(a->diag);CHKERRQ(ierr);
109   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
110   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
111   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
112   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
113   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
114   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
115   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
116 
117   ierr = PetscFree(a->inew);CHKERRQ(ierr);
118   ierr = PetscFree(a);CHKERRQ(ierr);
119 
120   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
121   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
122   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
125   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 #undef __FUNCT__
130 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
131 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
132 {
133   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
134   PetscErrorCode ierr;
135 
136   PetscFunctionBegin;
137   switch (op) {
138   case MAT_ROW_ORIENTED:
139     a->roworiented = PETSC_TRUE;
140     break;
141   case MAT_COLUMN_ORIENTED:
142     a->roworiented = PETSC_FALSE;
143     break;
144   case MAT_COLUMNS_SORTED:
145     a->sorted = PETSC_TRUE;
146     break;
147   case MAT_COLUMNS_UNSORTED:
148     a->sorted = PETSC_FALSE;
149     break;
150   case MAT_KEEP_ZEROED_ROWS:
151     a->keepzeroedrows = PETSC_TRUE;
152     break;
153   case MAT_NO_NEW_NONZERO_LOCATIONS:
154     a->nonew = 1;
155     break;
156   case MAT_NEW_NONZERO_LOCATION_ERR:
157     a->nonew = -1;
158     break;
159   case MAT_NEW_NONZERO_ALLOCATION_ERR:
160     a->nonew = -2;
161     break;
162   case MAT_YES_NEW_NONZERO_LOCATIONS:
163     a->nonew = 0;
164     break;
165   case MAT_ROWS_SORTED:
166   case MAT_ROWS_UNSORTED:
167   case MAT_YES_NEW_DIAGONALS:
168   case MAT_IGNORE_OFF_PROC_ENTRIES:
169   case MAT_USE_HASH_TABLE:
170     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
171     break;
172   case MAT_NO_NEW_DIAGONALS:
173     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
174   case MAT_NOT_SYMMETRIC:
175   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
176   case MAT_HERMITIAN:
177     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
178   case MAT_SYMMETRIC:
179   case MAT_STRUCTURALLY_SYMMETRIC:
180   case MAT_NOT_HERMITIAN:
181   case MAT_SYMMETRY_ETERNAL:
182   case MAT_NOT_SYMMETRY_ETERNAL:
183   case MAT_IGNORE_LOWER_TRIANGULAR:
184     a->ignore_ltriangular = PETSC_TRUE;
185     break;
186   case MAT_ERROR_LOWER_TRIANGULAR:
187     a->ignore_ltriangular = PETSC_FALSE;
188     break;
189   case MAT_GETROW_UPPERTRIANGULAR:
190     a->getrow_utriangular = PETSC_TRUE;
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
203   PetscErrorCode ierr;
204   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
205   MatScalar      *aa,*aa_i;
206   PetscScalar    *v_i;
207 
208   PetscFunctionBegin;
209   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()");
210   /* Get the upper triangular part of the row */
211   bs  = A->rmap.bs;
212   ai  = a->i;
213   aj  = a->j;
214   aa  = a->a;
215   bs2 = a->bs2;
216 
217   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
218 
219   bn  = row/bs;   /* Block number */
220   bp  = row % bs; /* Block position */
221   M   = ai[bn+1] - ai[bn];
222   *ncols = bs*M;
223 
224   if (v) {
225     *v = 0;
226     if (*ncols) {
227       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
228       for (i=0; i<M; i++) { /* for each block in the block row */
229         v_i  = *v + i*bs;
230         aa_i = aa + bs2*(ai[bn] + i);
231         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
232       }
233     }
234   }
235 
236   if (cols) {
237     *cols = 0;
238     if (*ncols) {
239       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
240       for (i=0; i<M; i++) { /* for each block in the block row */
241         cols_i = *cols + i*bs;
242         itmp  = bs*aj[ai[bn] + i];
243         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
244       }
245     }
246   }
247 
248   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
249   /* this segment is currently removed, so only entries in the upper triangle are obtained */
250 #ifdef column_search
251   v_i    = *v    + M*bs;
252   cols_i = *cols + M*bs;
253   for (i=0; i<bn; i++){ /* for each block row */
254     M = ai[i+1] - ai[i];
255     for (j=0; j<M; j++){
256       itmp = aj[ai[i] + j];    /* block column value */
257       if (itmp == bn){
258         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
259         for (k=0; k<bs; k++) {
260           *cols_i++ = i*bs+k;
261           *v_i++    = aa_i[k];
262         }
263         *ncols += bs;
264         break;
265       }
266     }
267   }
268 #endif
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
274 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
275 {
276   PetscErrorCode ierr;
277 
278   PetscFunctionBegin;
279   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
280   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
286 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
287 {
288   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
289 
290   PetscFunctionBegin;
291   a->getrow_utriangular = PETSC_TRUE;
292   PetscFunctionReturn(0);
293 }
294 #undef __FUNCT__
295 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
296 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
297 {
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
299 
300   PetscFunctionBegin;
301   a->getrow_utriangular = PETSC_FALSE;
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
307 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
308 {
309   PetscErrorCode ierr;
310   PetscFunctionBegin;
311   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
317 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
318 {
319   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
320   PetscErrorCode    ierr;
321   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
322   const char        *name;
323   PetscViewerFormat format;
324 
325   PetscFunctionBegin;
326   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
327   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
328   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
329     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
330   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
331     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
332   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
333     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
334     for (i=0; i<a->mbs; i++) {
335       for (j=0; j<bs; j++) {
336         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
337         for (k=a->i[i]; k<a->i[i+1]; k++) {
338           for (l=0; l<bs; l++) {
339 #if defined(PETSC_USE_COMPLEX)
340             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
341               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
342                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
343             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
344               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
345                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
346             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
347               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
348             }
349 #else
350             if (a->a[bs2*k + l*bs + j] != 0.0) {
351               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
352             }
353 #endif
354           }
355         }
356         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
357       }
358     }
359     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
360   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
361      PetscFunctionReturn(0);
362   } else {
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
364     for (i=0; i<a->mbs; i++) {
365       for (j=0; j<bs; j++) {
366         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
367         for (k=a->i[i]; k<a->i[i+1]; k++) {
368           for (l=0; l<bs; l++) {
369 #if defined(PETSC_USE_COMPLEX)
370             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
371               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
372                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
373             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
374               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
375                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
376             } else {
377               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             }
379 #else
380             ierr = PetscViewerASCIIPrintf(viewer," %D %G ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
381 #endif
382           }
383         }
384         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
385       }
386     }
387     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
388   }
389   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
395 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
396 {
397   Mat            A = (Mat) Aa;
398   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
399   PetscErrorCode ierr;
400   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
401   PetscMPIInt    rank;
402   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
403   MatScalar      *aa;
404   MPI_Comm       comm;
405   PetscViewer    viewer;
406 
407   PetscFunctionBegin;
408   /*
409     This is nasty. If this is called from an originally parallel matrix
410     then all processes call this,but only the first has the matrix so the
411     rest should return immediately.
412   */
413   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
414   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
415   if (rank) PetscFunctionReturn(0);
416 
417   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
418 
419   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
420   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
421 
422   /* loop over matrix elements drawing boxes */
423   color = PETSC_DRAW_BLUE;
424   for (i=0,row=0; i<mbs; i++,row+=bs) {
425     for (j=a->i[i]; j<a->i[i+1]; j++) {
426       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
427       x_l = a->j[j]*bs; x_r = x_l + 1.0;
428       aa = a->a + j*bs2;
429       for (k=0; k<bs; k++) {
430         for (l=0; l<bs; l++) {
431           if (PetscRealPart(*aa++) >=  0.) continue;
432           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
433         }
434       }
435     }
436   }
437   color = PETSC_DRAW_CYAN;
438   for (i=0,row=0; i<mbs; i++,row+=bs) {
439     for (j=a->i[i]; j<a->i[i+1]; j++) {
440       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
441       x_l = a->j[j]*bs; x_r = x_l + 1.0;
442       aa = a->a + j*bs2;
443       for (k=0; k<bs; k++) {
444         for (l=0; l<bs; l++) {
445           if (PetscRealPart(*aa++) != 0.) continue;
446           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
447         }
448       }
449     }
450   }
451 
452   color = PETSC_DRAW_RED;
453   for (i=0,row=0; i<mbs; i++,row+=bs) {
454     for (j=a->i[i]; j<a->i[i+1]; j++) {
455       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
456       x_l = a->j[j]*bs; x_r = x_l + 1.0;
457       aa = a->a + j*bs2;
458       for (k=0; k<bs; k++) {
459         for (l=0; l<bs; l++) {
460           if (PetscRealPart(*aa++) <= 0.) continue;
461           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
462         }
463       }
464     }
465   }
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
471 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
472 {
473   PetscErrorCode ierr;
474   PetscReal      xl,yl,xr,yr,w,h;
475   PetscDraw      draw;
476   PetscTruth     isnull;
477 
478   PetscFunctionBegin;
479 
480   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
481   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
482 
483   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
484   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
485   xr += w;    yr += h;  xl = -w;     yl = -h;
486   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
487   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
488   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatView_SeqSBAIJ"
494 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
495 {
496   PetscErrorCode ierr;
497   PetscTruth     iascii,isdraw;
498 
499   PetscFunctionBegin;
500   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
501   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
502   if (iascii){
503     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
504   } else if (isdraw) {
505     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
506   } else {
507     Mat B;
508     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
509     ierr = MatView(B,viewer);CHKERRQ(ierr);
510     ierr = MatDestroy(B);CHKERRQ(ierr);
511   }
512   PetscFunctionReturn(0);
513 }
514 
515 
516 #undef __FUNCT__
517 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
518 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
519 {
520   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
521   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
522   PetscInt     *ai = a->i,*ailen = a->ilen;
523   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
524   MatScalar    *ap,*aa = a->a,zero = 0.0;
525 
526   PetscFunctionBegin;
527   for (k=0; k<m; k++) { /* loop over rows */
528     row  = im[k]; brow = row/bs;
529     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
530     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
531     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
532     nrow = ailen[brow];
533     for (l=0; l<n; l++) { /* loop over columns */
534       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
535       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
536       col  = in[l] ;
537       bcol = col/bs;
538       cidx = col%bs;
539       ridx = row%bs;
540       high = nrow;
541       low  = 0; /* assume unsorted */
542       while (high-low > 5) {
543         t = (low+high)/2;
544         if (rp[t] > bcol) high = t;
545         else             low  = t;
546       }
547       for (i=low; i<high; i++) {
548         if (rp[i] > bcol) break;
549         if (rp[i] == bcol) {
550           *v++ = ap[bs2*i+bs*cidx+ridx];
551           goto finished;
552         }
553       }
554       *v++ = zero;
555        finished:;
556     }
557   }
558   PetscFunctionReturn(0);
559 }
560 
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
564 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
565 {
566   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
567   PetscErrorCode  ierr;
568   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
569   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
570   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
571   PetscTruth      roworiented=a->roworiented;
572   const MatScalar *value = v;
573   MatScalar       *ap,*aa = a->a,*bap;
574 
575   PetscFunctionBegin;
576   if (roworiented) {
577     stepval = (n-1)*bs;
578   } else {
579     stepval = (m-1)*bs;
580   }
581   for (k=0; k<m; k++) { /* loop over added rows */
582     row  = im[k];
583     if (row < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
586 #endif
587     rp   = aj + ai[row];
588     ap   = aa + bs2*ai[row];
589     rmax = imax[row];
590     nrow = ailen[row];
591     low  = 0;
592     high = nrow;
593     for (l=0; l<n; l++) { /* loop over added columns */
594       if (in[l] < 0) continue;
595       col = in[l];
596 #if defined(PETSC_USE_DEBUG)
597       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
598 #endif
599       if (col < row) continue; /* ignore lower triangular block */
600       if (roworiented) {
601         value = v + k*(stepval+bs)*bs + l*bs;
602       } else {
603         value = v + l*(stepval+bs)*bs + k*bs;
604       }
605       if (col <= lastcol) low = 0; else high = nrow;
606       lastcol = col;
607       while (high-low > 7) {
608         t = (low+high)/2;
609         if (rp[t] > col) high = t;
610         else             low  = t;
611       }
612       for (i=low; i<high; i++) {
613         if (rp[i] > col) break;
614         if (rp[i] == col) {
615           bap  = ap +  bs2*i;
616           if (roworiented) {
617             if (is == ADD_VALUES) {
618               for (ii=0; ii<bs; ii++,value+=stepval) {
619                 for (jj=ii; jj<bs2; jj+=bs) {
620                   bap[jj] += *value++;
621                 }
622               }
623             } else {
624               for (ii=0; ii<bs; ii++,value+=stepval) {
625                 for (jj=ii; jj<bs2; jj+=bs) {
626                   bap[jj] = *value++;
627                 }
628                }
629             }
630           } else {
631             if (is == ADD_VALUES) {
632               for (ii=0; ii<bs; ii++,value+=stepval) {
633                 for (jj=0; jj<bs; jj++) {
634                   *bap++ += *value++;
635                 }
636               }
637             } else {
638               for (ii=0; ii<bs; ii++,value+=stepval) {
639                 for (jj=0; jj<bs; jj++) {
640                   *bap++  = *value++;
641                 }
642               }
643             }
644           }
645           goto noinsert2;
646         }
647       }
648       if (nonew == 1) goto noinsert2;
649       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
650       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
651       N = nrow++ - 1; high++;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
656       }
657       if (N >= i) {
658         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
659       }
660       rp[i] = col;
661       bap   = ap +  bs2*i;
662       if (roworiented) {
663         for (ii=0; ii<bs; ii++,value+=stepval) {
664           for (jj=ii; jj<bs2; jj+=bs) {
665             bap[jj] = *value++;
666           }
667         }
668       } else {
669         for (ii=0; ii<bs; ii++,value+=stepval) {
670           for (jj=0; jj<bs; jj++) {
671             *bap++  = *value++;
672           }
673         }
674        }
675     noinsert2:;
676       low = i;
677     }
678     ailen[row] = nrow;
679   }
680    PetscFunctionReturn(0);
681 }
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
685 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
686 {
687   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
688   PetscErrorCode ierr;
689   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
690   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
691   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
692   MatScalar      *aa = a->a,*ap;
693 
694   PetscFunctionBegin;
695   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
696 
697   if (m) rmax = ailen[0];
698   for (i=1; i<mbs; i++) {
699     /* move each row back by the amount of empty slots (fshift) before it*/
700     fshift += imax[i-1] - ailen[i-1];
701      rmax   = PetscMax(rmax,ailen[i]);
702      if (fshift) {
703        ip = aj + ai[i]; ap = aa + bs2*ai[i];
704        N = ailen[i];
705        for (j=0; j<N; j++) {
706          ip[j-fshift] = ip[j];
707          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
708        }
709      }
710      ai[i] = ai[i-1] + ailen[i-1];
711   }
712   if (mbs) {
713     fshift += imax[mbs-1] - ailen[mbs-1];
714      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
715   }
716   /* reset ilen and imax for each row */
717   for (i=0; i<mbs; i++) {
718     ailen[i] = imax[i] = ai[i+1] - ai[i];
719   }
720   a->nz = ai[mbs];
721 
722   /* diagonals may have moved, reset it */
723   if (a->diag) {
724     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
725   }
726   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
727   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
728   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
729   a->reallocs          = 0;
730   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
731   PetscFunctionReturn(0);
732 }
733 
734 /*
735    This function returns an array of flags which indicate the locations of contiguous
736    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
737    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
738    Assume: sizes should be long enough to hold all the values.
739 */
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
742 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
743 {
744   PetscInt   i,j,k,row;
745   PetscTruth flg;
746 
747   PetscFunctionBegin;
748    for (i=0,j=0; i<n; j++) {
749      row = idx[i];
750      if (row%bs!=0) { /* Not the begining of a block */
751        sizes[j] = 1;
752        i++;
753      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
754        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
755        i++;
756      } else { /* Begining of the block, so check if the complete block exists */
757        flg = PETSC_TRUE;
758        for (k=1; k<bs; k++) {
759          if (row+k != idx[i+k]) { /* break in the block */
760            flg = PETSC_FALSE;
761            break;
762          }
763        }
764        if (flg) { /* No break in the bs */
765          sizes[j] = bs;
766          i+= bs;
767        } else {
768          sizes[j] = 1;
769          i++;
770        }
771      }
772    }
773    *bs_max = j;
774    PetscFunctionReturn(0);
775 }
776 
777 
778 /* Only add/insert a(i,j) with i<=j (blocks).
779    Any a(i,j) with i>j input by user is ingored.
780 */
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
784 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
785 {
786   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
787   PetscErrorCode ierr;
788   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
789   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
790   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
791   PetscInt       ridx,cidx,bs2=a->bs2;
792   MatScalar      *ap,value,*aa=a->a,*bap;
793 
794   PetscFunctionBegin;
795   for (k=0; k<m; k++) { /* loop over added rows */
796     row  = im[k];       /* row number */
797     brow = row/bs;      /* block row number */
798     if (row < 0) continue;
799 #if defined(PETSC_USE_DEBUG)
800     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
801 #endif
802     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
803     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
804     rmax = imax[brow];  /* maximum space allocated for this row */
805     nrow = ailen[brow]; /* actual length of this row */
806     low  = 0;
807 
808     for (l=0; l<n; l++) { /* loop over added columns */
809       if (in[l] < 0) continue;
810 #if defined(PETSC_USE_DEBUG)
811       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
812 #endif
813       col = in[l];
814       bcol = col/bs;              /* block col number */
815 
816       if (brow > bcol) {
817         if (a->ignore_ltriangular){
818           continue; /* ignore lower triangular values */
819         } else {
820           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
821         }
822       }
823 
824       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
825       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
826         /* element value a(k,l) */
827         if (roworiented) {
828           value = v[l + k*n];
829         } else {
830           value = v[k + l*m];
831         }
832 
833         /* move pointer bap to a(k,l) quickly and add/insert value */
834         if (col <= lastcol) low = 0; high = nrow;
835         lastcol = col;
836         while (high-low > 7) {
837           t = (low+high)/2;
838           if (rp[t] > bcol) high = t;
839           else              low  = t;
840         }
841         for (i=low; i<high; i++) {
842           if (rp[i] > bcol) break;
843           if (rp[i] == bcol) {
844             bap  = ap +  bs2*i + bs*cidx + ridx;
845             if (is == ADD_VALUES) *bap += value;
846             else                  *bap  = value;
847             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
848             if (brow == bcol && ridx < cidx){
849               bap  = ap +  bs2*i + bs*ridx + cidx;
850               if (is == ADD_VALUES) *bap += value;
851               else                  *bap  = value;
852             }
853             goto noinsert1;
854           }
855         }
856 
857         if (nonew == 1) goto noinsert1;
858         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
859         MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
860 
861         N = nrow++ - 1; high++;
862         /* shift up all the later entries in this row */
863         for (ii=N; ii>=i; ii--) {
864           rp[ii+1] = rp[ii];
865           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
866         }
867         if (N>=i) {
868           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
869         }
870         rp[i]                      = bcol;
871         ap[bs2*i + bs*cidx + ridx] = value;
872       noinsert1:;
873         low = i;
874       }
875     }   /* end of loop over added columns */
876     ailen[brow] = nrow;
877   }   /* end of loop over added rows */
878   PetscFunctionReturn(0);
879 }
880 
881 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
882 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
883 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
884 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
885 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
886 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar);
887 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
888 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
889 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
890 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
891 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
892 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
893 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
894 
895 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
896 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
897 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
898 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
899 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
900 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
901 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
902 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
903 
904 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
905 
906 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
907 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
908 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
909 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
910 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
911 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
912 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
913 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
914 
915 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*);
916 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*);
917 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*);
918 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*);
919 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*);
920 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*);
921 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*);
922 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*);
923 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
924 
925 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
926 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
927 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
928 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
929 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
930 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
931 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
932 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
933 
934 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
935 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
936 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
937 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
938 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
939 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
940 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
941 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
942 
943 #ifdef HAVE_MatICCFactor
944 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
945 #undef __FUNCT__
946 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
947 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
948 {
949   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
950   Mat            outA;
951   PetscErrorCode ierr;
952   PetscTruth     row_identity,col_identity;
953 
954   PetscFunctionBegin;
955   outA          = inA;
956   inA->factor   = FACTOR_CHOLESKY;
957 
958   if (!a->diag) {
959     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
960   }
961   /*
962     Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
963     for ILU(0) factorization with natural ordering
964   */
965   switch (a->rmap.bs) {
966   case 1:
967     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
968     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
969     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
970     ierr = PetscInfo((inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
971   case 2:
972     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
973     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
974     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
975     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
976     break;
977   case 3:
978      inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
979      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
980      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
981      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
982      break;
983   case 4:
984     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
985     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
986     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
987     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
988     break;
989   case 5:
990     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
991     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
992     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
993     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
994     break;
995   case 6:
996     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
997     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
998     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
999     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
1000     break;
1001   case 7:
1002     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1003     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1004     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1005     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
1006     break;
1007   default:
1008     a->row        = row;
1009     a->icol       = col;
1010     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1011     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1012 
1013     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1014     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1015     ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1016 
1017     if (!a->solve_work) {
1018       ierr = PetscMalloc((A->rmap.N+a->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1019       ierr = PetscLogObjectMemory(inA,(A->rmap.N+a->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1020     }
1021   }
1022 
1023   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 #endif
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1030 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1031 {
1032   static PetscTruth called = PETSC_FALSE;
1033   MPI_Comm          comm = A->comm;
1034   PetscErrorCode    ierr;
1035 
1036   PetscFunctionBegin;
1037   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1038   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1039   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1040   ierr = (*PetscHelpPrintf)(comm,"  -mat_ignore_lower_triangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr);
1041   ierr = (*PetscHelpPrintf)(comm,"  -mat_getrow_uppertriangular: Enable MatGetRow() for retrieving the upper triangular part of the row\n");CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 EXTERN_C_BEGIN
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1048 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1049 {
1050   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1051   PetscInt     i,nz,n;
1052 
1053   PetscFunctionBegin;
1054   nz = baij->maxnz;
1055   n  = mat->cmap.n;
1056   for (i=0; i<nz; i++) {
1057     baij->j[i] = indices[i];
1058   }
1059    baij->nz = nz;
1060    for (i=0; i<n; i++) {
1061      baij->ilen[i] = baij->imax[i];
1062    }
1063    PetscFunctionReturn(0);
1064 }
1065 EXTERN_C_END
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1069 /*@
1070   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1071   in the matrix.
1072 
1073   Input Parameters:
1074   +  mat     - the SeqSBAIJ matrix
1075   -  indices - the column indices
1076 
1077   Level: advanced
1078 
1079   Notes:
1080   This can be called if you have precomputed the nonzero structure of the
1081   matrix and want to provide it to the matrix object to improve the performance
1082   of the MatSetValues() operation.
1083 
1084   You MUST have set the correct numbers of nonzeros per row in the call to
1085   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1086 
1087   MUST be called before any calls to MatSetValues()
1088 
1089   .seealso: MatCreateSeqSBAIJ
1090 @*/
1091 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1092 {
1093   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1094 
1095   PetscFunctionBegin;
1096   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1097   PetscValidPointer(indices,2);
1098   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1099   if (f) {
1100     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1101   } else {
1102     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1109 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1110 {
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   /* If the two matrices have the same copy implementation, use fast copy. */
1115   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1116     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1117     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1118 
1119     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1120       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1121     }
1122     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1123   } else {
1124     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1125     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1126     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1127   }
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1133 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1134 {
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1144 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1145 {
1146   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1147   PetscFunctionBegin;
1148   *array = a->a;
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1154 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1155 {
1156   PetscFunctionBegin;
1157   PetscFunctionReturn(0);
1158  }
1159 
1160 #include "petscblaslapack.h"
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1163 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1164 {
1165   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1166   PetscErrorCode ierr;
1167   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1168   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1169 
1170   PetscFunctionBegin;
1171   if (str == SAME_NONZERO_PATTERN) {
1172     PetscScalar alpha = a;
1173     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1174   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1175     if (y->xtoy && y->XtoY != X) {
1176       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1177       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1178     }
1179     if (!y->xtoy) { /* get xtoy */
1180       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1181       y->XtoY = X;
1182     }
1183     bs2 = bs*bs;
1184     for (i=0; i<x->nz; i++) {
1185       j = 0;
1186       while (j < bs2){
1187         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1188         j++;
1189       }
1190     }
1191     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1192   } else {
1193     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1194     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1195     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1202 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1203 {
1204   PetscFunctionBegin;
1205   *flg = PETSC_TRUE;
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1211 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1212 {
1213    PetscFunctionBegin;
1214    *flg = PETSC_TRUE;
1215    PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1220 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1221  {
1222    PetscFunctionBegin;
1223    *flg = PETSC_FALSE;
1224    PetscFunctionReturn(0);
1225  }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1229 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1230 {
1231   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1232   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1233   PetscScalar    *aa = a->a;
1234 
1235   PetscFunctionBegin;
1236   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1242 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1243 {
1244   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1245   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1246   PetscScalar    *aa = a->a;
1247 
1248   PetscFunctionBegin;
1249   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 /* -------------------------------------------------------------------*/
1254 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1255        MatGetRow_SeqSBAIJ,
1256        MatRestoreRow_SeqSBAIJ,
1257        MatMult_SeqSBAIJ_N,
1258 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1259        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1260        MatMultAdd_SeqSBAIJ_N,
1261        MatSolve_SeqSBAIJ_N,
1262        0,
1263        0,
1264 /*10*/ 0,
1265        0,
1266        MatCholeskyFactor_SeqSBAIJ,
1267        MatRelax_SeqSBAIJ,
1268        MatTranspose_SeqSBAIJ,
1269 /*15*/ MatGetInfo_SeqSBAIJ,
1270        MatEqual_SeqSBAIJ,
1271        MatGetDiagonal_SeqSBAIJ,
1272        MatDiagonalScale_SeqSBAIJ,
1273        MatNorm_SeqSBAIJ,
1274 /*20*/ 0,
1275        MatAssemblyEnd_SeqSBAIJ,
1276        0,
1277        MatSetOption_SeqSBAIJ,
1278        MatZeroEntries_SeqSBAIJ,
1279 /*25*/ 0,
1280        0,
1281        0,
1282        MatCholeskyFactorSymbolic_SeqSBAIJ,
1283        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1284 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1285        0,
1286        MatICCFactorSymbolic_SeqSBAIJ,
1287        MatGetArray_SeqSBAIJ,
1288        MatRestoreArray_SeqSBAIJ,
1289 /*35*/ MatDuplicate_SeqSBAIJ,
1290        0,
1291        0,
1292        0,
1293        0,
1294 /*40*/ MatAXPY_SeqSBAIJ,
1295        MatGetSubMatrices_SeqSBAIJ,
1296        MatIncreaseOverlap_SeqSBAIJ,
1297        MatGetValues_SeqSBAIJ,
1298        MatCopy_SeqSBAIJ,
1299 /*45*/ MatPrintHelp_SeqSBAIJ,
1300        MatScale_SeqSBAIJ,
1301        0,
1302        0,
1303        0,
1304 /*50*/ 0,
1305        MatGetRowIJ_SeqSBAIJ,
1306        MatRestoreRowIJ_SeqSBAIJ,
1307        0,
1308        0,
1309 /*55*/ 0,
1310        0,
1311        0,
1312        0,
1313        MatSetValuesBlocked_SeqSBAIJ,
1314 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1315        0,
1316        0,
1317        0,
1318        0,
1319 /*65*/ 0,
1320        0,
1321        0,
1322        0,
1323        0,
1324 /*70*/ MatGetRowMax_SeqSBAIJ,
1325        0,
1326        0,
1327        0,
1328        0,
1329 /*75*/ 0,
1330        0,
1331        0,
1332        0,
1333        0,
1334 /*80*/ 0,
1335        0,
1336        0,
1337 #if !defined(PETSC_USE_COMPLEX)
1338        MatGetInertia_SeqSBAIJ,
1339 #else
1340        0,
1341 #endif
1342        MatLoad_SeqSBAIJ,
1343 /*85*/ MatIsSymmetric_SeqSBAIJ,
1344        MatIsHermitian_SeqSBAIJ,
1345        MatIsStructurallySymmetric_SeqSBAIJ,
1346        0,
1347        0,
1348 /*90*/ 0,
1349        0,
1350        0,
1351        0,
1352        0,
1353 /*95*/ 0,
1354        0,
1355        0,
1356        0,
1357        0,
1358 /*100*/0,
1359        0,
1360        0,
1361        0,
1362        0,
1363 /*105*/0,
1364        MatRealPart_SeqSBAIJ,
1365        MatImaginaryPart_SeqSBAIJ,
1366        MatGetRowUpperTriangular_SeqSBAIJ,
1367        MatRestoreRowUpperTriangular_SeqSBAIJ
1368 };
1369 
1370 EXTERN_C_BEGIN
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1373 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1374 {
1375   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1376   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   if (aij->nonew != 1) {
1381     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1382   }
1383 
1384   /* allocate space for values if not already there */
1385   if (!aij->saved_values) {
1386     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1387   }
1388 
1389   /* copy values over */
1390   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 EXTERN_C_END
1394 
1395 EXTERN_C_BEGIN
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1398 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1399 {
1400   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1401   PetscErrorCode ierr;
1402   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1403 
1404   PetscFunctionBegin;
1405   if (aij->nonew != 1) {
1406     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1407   }
1408   if (!aij->saved_values) {
1409     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1410   }
1411 
1412   /* copy values over */
1413   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 EXTERN_C_END
1417 
1418 EXTERN_C_BEGIN
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1421 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1422 {
1423   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1424   PetscErrorCode ierr;
1425   PetscInt       i,mbs,bs2;
1426   PetscTruth     skipallocation = PETSC_FALSE,flg;
1427 
1428   PetscFunctionBegin;
1429   B->preallocated = PETSC_TRUE;
1430   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1431   B->rmap.bs = B->cmap.bs = bs;
1432   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1433   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1434 
1435   mbs  = B->rmap.N/bs;
1436   bs2  = bs*bs;
1437 
1438   if (mbs*bs != B->rmap.N) {
1439     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1440   }
1441 
1442   if (nz == MAT_SKIP_ALLOCATION) {
1443     skipallocation = PETSC_TRUE;
1444     nz             = 0;
1445   }
1446 
1447   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1448   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1449   if (nnz) {
1450     for (i=0; i<mbs; i++) {
1451       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1452       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1453     }
1454   }
1455 
1456   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1457   if (!flg) {
1458     switch (bs) {
1459     case 1:
1460       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1461       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1462       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1463       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1464       B->ops->mult             = MatMult_SeqSBAIJ_1;
1465       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1466       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1467       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1468       break;
1469     case 2:
1470       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1471       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1472       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1473       B->ops->mult             = MatMult_SeqSBAIJ_2;
1474       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1475       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1476       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1477       break;
1478     case 3:
1479       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1480       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1481       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1482       B->ops->mult             = MatMult_SeqSBAIJ_3;
1483       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1484       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1485       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1486       break;
1487     case 4:
1488       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1489       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1490       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1491       B->ops->mult             = MatMult_SeqSBAIJ_4;
1492       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1493       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1494       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1495       break;
1496     case 5:
1497       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1498       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1499       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1500       B->ops->mult             = MatMult_SeqSBAIJ_5;
1501       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1502       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1503       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1504       break;
1505     case 6:
1506       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1507       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1508       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1509       B->ops->mult             = MatMult_SeqSBAIJ_6;
1510       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1511       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1512       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1513       break;
1514     case 7:
1515       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1516       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1517       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1518       B->ops->mult             = MatMult_SeqSBAIJ_7;
1519       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1520       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1521       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1522       break;
1523     }
1524   }
1525 
1526   b->mbs = mbs;
1527   b->nbs = mbs;
1528   if (!skipallocation) {
1529     /* b->ilen will count nonzeros in each block row so far. */
1530     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1531     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1532     if (!nnz) {
1533       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1534       else if (nz <= 0)        nz = 1;
1535       for (i=0; i<mbs; i++) {
1536         b->imax[i] = nz;
1537       }
1538       nz = nz*mbs; /* total nz */
1539     } else {
1540       nz = 0;
1541       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1542     }
1543     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1544 
1545     /* allocate the matrix space */
1546     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1547     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1548     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1549     b->singlemalloc = PETSC_TRUE;
1550 
1551     /* pointer to beginning of each row */
1552     b->i[0] = 0;
1553     for (i=1; i<mbs+1; i++) {
1554       b->i[i] = b->i[i-1] + b->imax[i-1];
1555     }
1556     b->freedata     = PETSC_TRUE;
1557   } else {
1558     b->freedata     = PETSC_FALSE;
1559   }
1560 
1561   B->rmap.bs               = bs;
1562   b->bs2              = bs2;
1563   b->nz             = 0;
1564   b->maxnz          = nz*bs2;
1565 
1566   b->inew             = 0;
1567   b->jnew             = 0;
1568   b->anew             = 0;
1569   b->a2anew           = 0;
1570   b->permute          = PETSC_FALSE;
1571   PetscFunctionReturn(0);
1572 }
1573 EXTERN_C_END
1574 
1575 EXTERN_C_BEGIN
1576 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1577 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1578 EXTERN_C_END
1579 
1580 /*MC
1581   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1582   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1583 
1584   Options Database Keys:
1585   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1586 
1587   Level: beginner
1588 
1589   .seealso: MatCreateSeqSBAIJ
1590 M*/
1591 
1592 EXTERN_C_BEGIN
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1596 {
1597   Mat_SeqSBAIJ   *b;
1598   PetscErrorCode ierr;
1599   PetscMPIInt    size;
1600   PetscTruth     flg;
1601 
1602   PetscFunctionBegin;
1603   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1604   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1605 
1606   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1607   B->data = (void*)b;
1608   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1609   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1610   B->ops->view        = MatView_SeqSBAIJ;
1611   B->factor           = 0;
1612   B->mapping          = 0;
1613   b->row              = 0;
1614   b->icol             = 0;
1615   b->reallocs         = 0;
1616   b->saved_values     = 0;
1617 
1618 
1619   b->sorted           = PETSC_FALSE;
1620   b->roworiented      = PETSC_TRUE;
1621   b->nonew            = 0;
1622   b->diag             = 0;
1623   b->solve_work       = 0;
1624   b->mult_work        = 0;
1625   B->spptr            = 0;
1626   b->keepzeroedrows   = PETSC_FALSE;
1627   b->xtoy             = 0;
1628   b->XtoY             = 0;
1629 
1630   b->inew             = 0;
1631   b->jnew             = 0;
1632   b->anew             = 0;
1633   b->a2anew           = 0;
1634   b->permute          = PETSC_FALSE;
1635 
1636   b->ignore_ltriangular = PETSC_FALSE;
1637   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1638   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1639 
1640   b->getrow_utriangular = PETSC_FALSE;
1641   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1642   if (flg) b->getrow_utriangular = PETSC_TRUE;
1643 
1644   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1645                                      "MatStoreValues_SeqSBAIJ",
1646                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1647   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1648                                      "MatRetrieveValues_SeqSBAIJ",
1649                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1650   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1651                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1652                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1654                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1655                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1657                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1658                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1660                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1661                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1662 
1663   B->symmetric                  = PETSC_TRUE;
1664   B->structurally_symmetric     = PETSC_TRUE;
1665   B->symmetric_set              = PETSC_TRUE;
1666   B->structurally_symmetric_set = PETSC_TRUE;
1667   PetscFunctionReturn(0);
1668 }
1669 EXTERN_C_END
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1673 /*@C
1674    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1675    compressed row) format.  For good matrix assembly performance the
1676    user should preallocate the matrix storage by setting the parameter nz
1677    (or the array nnz).  By setting these parameters accurately, performance
1678    during matrix assembly can be increased by more than a factor of 50.
1679 
1680    Collective on Mat
1681 
1682    Input Parameters:
1683 +  A - the symmetric matrix
1684 .  bs - size of block
1685 .  nz - number of block nonzeros per block row (same for all rows)
1686 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1687          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1688 
1689    Options Database Keys:
1690 .   -mat_no_unroll - uses code that does not unroll the loops in the
1691                      block calculations (much slower)
1692 .    -mat_block_size - size of the blocks to use
1693 
1694    Level: intermediate
1695 
1696    Notes:
1697    Specify the preallocated storage with either nz or nnz (not both).
1698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1699    allocation.  For additional details, see the users manual chapter on
1700    matrices.
1701 
1702    If the nnz parameter is given then the nz parameter is ignored
1703 
1704 
1705 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1706 @*/
1707 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1708 {
1709   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1710 
1711   PetscFunctionBegin;
1712   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1713   if (f) {
1714     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatCreateSeqSBAIJ"
1721 /*@C
1722    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1723    compressed row) format.  For good matrix assembly performance the
1724    user should preallocate the matrix storage by setting the parameter nz
1725    (or the array nnz).  By setting these parameters accurately, performance
1726    during matrix assembly can be increased by more than a factor of 50.
1727 
1728    Collective on MPI_Comm
1729 
1730    Input Parameters:
1731 +  comm - MPI communicator, set to PETSC_COMM_SELF
1732 .  bs - size of block
1733 .  m - number of rows, or number of columns
1734 .  nz - number of block nonzeros per block row (same for all rows)
1735 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1736          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1737 
1738    Output Parameter:
1739 .  A - the symmetric matrix
1740 
1741    Options Database Keys:
1742 .   -mat_no_unroll - uses code that does not unroll the loops in the
1743                      block calculations (much slower)
1744 .    -mat_block_size - size of the blocks to use
1745 
1746    Level: intermediate
1747 
1748    Notes:
1749 
1750    Specify the preallocated storage with either nz or nnz (not both).
1751    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1752    allocation.  For additional details, see the users manual chapter on
1753    matrices.
1754 
1755    If the nnz parameter is given then the nz parameter is ignored
1756 
1757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1758 @*/
1759 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1760 {
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1765   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1766   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1767   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1773 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1774 {
1775   Mat            C;
1776   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1777   PetscErrorCode ierr;
1778   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1779 
1780   PetscFunctionBegin;
1781   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1782 
1783   *B = 0;
1784   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1785   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1786   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1787   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1788   c    = (Mat_SeqSBAIJ*)C->data;
1789 
1790   C->preallocated   = PETSC_TRUE;
1791   C->factor         = A->factor;
1792   c->row            = 0;
1793   c->icol           = 0;
1794   c->saved_values   = 0;
1795   c->keepzeroedrows = a->keepzeroedrows;
1796   C->assembled      = PETSC_TRUE;
1797 
1798   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1799   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1800   c->bs2  = a->bs2;
1801   c->mbs  = a->mbs;
1802   c->nbs  = a->nbs;
1803 
1804   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1805   for (i=0; i<mbs; i++) {
1806     c->imax[i] = a->imax[i];
1807     c->ilen[i] = a->ilen[i];
1808   }
1809 
1810   /* allocate the matrix space */
1811   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1812   c->singlemalloc = PETSC_TRUE;
1813   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1814   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1815   if (mbs > 0) {
1816     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1817     if (cpvalues == MAT_COPY_VALUES) {
1818       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1819     } else {
1820       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1821     }
1822   }
1823 
1824   c->sorted      = a->sorted;
1825   c->roworiented = a->roworiented;
1826   c->nonew       = a->nonew;
1827 
1828   if (a->diag) {
1829     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1830     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1831     for (i=0; i<mbs; i++) {
1832       c->diag[i] = a->diag[i];
1833     }
1834   } else c->diag  = 0;
1835   c->nz           = a->nz;
1836   c->maxnz        = a->maxnz;
1837   c->solve_work   = 0;
1838   c->mult_work    = 0;
1839   c->freedata     = PETSC_TRUE;
1840   *B = C;
1841   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1847 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1848 {
1849   Mat_SeqSBAIJ   *a;
1850   Mat            B;
1851   PetscErrorCode ierr;
1852   int            fd;
1853   PetscMPIInt    size;
1854   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1855   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1856   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1857   PetscInt       *masked,nmask,tmp,bs2,ishift;
1858   PetscScalar    *aa;
1859   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1860 
1861   PetscFunctionBegin;
1862   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1863   bs2  = bs*bs;
1864 
1865   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1866   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1867   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1868   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1869   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1870   M = header[1]; N = header[2]; nz = header[3];
1871 
1872   if (header[3] < 0) {
1873     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1874   }
1875 
1876   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1877 
1878   /*
1879      This code adds extra rows to make sure the number of rows is
1880     divisible by the blocksize
1881   */
1882   mbs        = M/bs;
1883   extra_rows = bs - M + bs*(mbs);
1884   if (extra_rows == bs) extra_rows = 0;
1885   else                  mbs++;
1886   if (extra_rows) {
1887     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1888   }
1889 
1890   /* read in row lengths */
1891   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1892   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1893   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1894 
1895   /* read in column indices */
1896   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1897   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1898   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1899 
1900   /* loop over row lengths determining block row lengths */
1901   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1902   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1903   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1904   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1905   masked   = mask + mbs;
1906   rowcount = 0; nzcount = 0;
1907   for (i=0; i<mbs; i++) {
1908     nmask = 0;
1909     for (j=0; j<bs; j++) {
1910       kmax = rowlengths[rowcount];
1911       for (k=0; k<kmax; k++) {
1912         tmp = jj[nzcount++]/bs;   /* block col. index */
1913         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1914       }
1915       rowcount++;
1916     }
1917     s_browlengths[i] += nmask;
1918 
1919     /* zero out the mask elements we set */
1920     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1921   }
1922 
1923   /* create our matrix */
1924   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1925   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1926   ierr = MatSetType(B,type);CHKERRQ(ierr);
1927   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1928   a = (Mat_SeqSBAIJ*)B->data;
1929 
1930   /* set matrix "i" values */
1931   a->i[0] = 0;
1932   for (i=1; i<= mbs; i++) {
1933     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1934     a->ilen[i-1] = s_browlengths[i-1];
1935   }
1936   a->nz = a->i[mbs];
1937 
1938   /* read in nonzero values */
1939   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1940   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1941   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1942 
1943   /* set "a" and "j" values into matrix */
1944   nzcount = 0; jcount = 0;
1945   for (i=0; i<mbs; i++) {
1946     nzcountb = nzcount;
1947     nmask    = 0;
1948     for (j=0; j<bs; j++) {
1949       kmax = rowlengths[i*bs+j];
1950       for (k=0; k<kmax; k++) {
1951         tmp = jj[nzcount++]/bs; /* block col. index */
1952         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1953       }
1954     }
1955     /* sort the masked values */
1956     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1957 
1958     /* set "j" values into matrix */
1959     maskcount = 1;
1960     for (j=0; j<nmask; j++) {
1961       a->j[jcount++]  = masked[j];
1962       mask[masked[j]] = maskcount++;
1963     }
1964 
1965     /* set "a" values into matrix */
1966     ishift = bs2*a->i[i];
1967     for (j=0; j<bs; j++) {
1968       kmax = rowlengths[i*bs+j];
1969       for (k=0; k<kmax; k++) {
1970         tmp       = jj[nzcountb]/bs ; /* block col. index */
1971         if (tmp >= i){
1972           block     = mask[tmp] - 1;
1973           point     = jj[nzcountb] - bs*tmp;
1974           idx       = ishift + bs2*block + j + bs*point;
1975           a->a[idx] = aa[nzcountb];
1976         }
1977         nzcountb++;
1978       }
1979     }
1980     /* zero out the mask elements we set */
1981     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1982   }
1983   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1984 
1985   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1986   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1987   ierr = PetscFree(aa);CHKERRQ(ierr);
1988   ierr = PetscFree(jj);CHKERRQ(ierr);
1989   ierr = PetscFree(mask);CHKERRQ(ierr);
1990 
1991   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1992   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatView_Private(B);CHKERRQ(ierr);
1994   *A = B;
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2000 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2001 {
2002   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
2003   MatScalar      *aa=a->a,*v,*v1;
2004   PetscScalar    *x,*b,*t,sum,d;
2005   PetscErrorCode ierr;
2006   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
2007   PetscInt       nz,nz1,*vj,*vj1,i;
2008 
2009   PetscFunctionBegin;
2010   its = its*lits;
2011   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2012 
2013   if (bs > 1)
2014     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2015 
2016   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2017   if (xx != bb) {
2018     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2019   } else {
2020     b = x;
2021   }
2022 
2023   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2024 
2025   if (flag & SOR_ZERO_INITIAL_GUESS) {
2026     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2027       for (i=0; i<m; i++)
2028         t[i] = b[i];
2029 
2030       for (i=0; i<m; i++){
2031         d  = *(aa + ai[i]);  /* diag[i] */
2032         v  = aa + ai[i] + 1;
2033         vj = aj + ai[i] + 1;
2034         nz = ai[i+1] - ai[i] - 1;
2035         x[i] = omega*t[i]/d;
2036         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2037         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2038       }
2039     }
2040 
2041     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2042       for (i=0; i<m; i++)
2043         t[i] = b[i];
2044 
2045       for (i=0; i<m-1; i++){  /* update rhs */
2046         v  = aa + ai[i] + 1;
2047         vj = aj + ai[i] + 1;
2048         nz = ai[i+1] - ai[i] - 1;
2049         while (nz--) t[*vj++] -= x[i]*(*v++);
2050         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2051       }
2052       for (i=m-1; i>=0; i--){
2053         d  = *(aa + ai[i]);
2054         v  = aa + ai[i] + 1;
2055         vj = aj + ai[i] + 1;
2056         nz = ai[i+1] - ai[i] - 1;
2057         sum = t[i];
2058         while (nz--) sum -= x[*vj++]*(*v++);
2059         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2060         x[i] =   (1-omega)*x[i] + omega*sum/d;
2061       }
2062     }
2063     its--;
2064   }
2065 
2066   while (its--) {
2067     /*
2068        forward sweep:
2069        for i=0,...,m-1:
2070          sum[i] = (b[i] - U(i,:)x )/d[i];
2071          x[i]   = (1-omega)x[i] + omega*sum[i];
2072          b      = b - x[i]*U^T(i,:);
2073 
2074     */
2075     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2076       for (i=0; i<m; i++)
2077         t[i] = b[i];
2078 
2079       for (i=0; i<m; i++){
2080         d  = *(aa + ai[i]);  /* diag[i] */
2081         v  = aa + ai[i] + 1; v1=v;
2082         vj = aj + ai[i] + 1; vj1=vj;
2083         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2084         sum = t[i];
2085         while (nz1--) sum -= (*v1++)*x[*vj1++];
2086         x[i] = (1-omega)*x[i] + omega*sum/d;
2087         while (nz--) t[*vj++] -= x[i]*(*v++);
2088         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2089       }
2090     }
2091 
2092   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2093       /*
2094        backward sweep:
2095        b = b - x[i]*U^T(i,:), i=0,...,n-2
2096        for i=m-1,...,0:
2097          sum[i] = (b[i] - U(i,:)x )/d[i];
2098          x[i]   = (1-omega)x[i] + omega*sum[i];
2099       */
2100       for (i=0; i<m; i++)
2101         t[i] = b[i];
2102 
2103       for (i=0; i<m-1; i++){  /* update rhs */
2104         v  = aa + ai[i] + 1;
2105         vj = aj + ai[i] + 1;
2106         nz = ai[i+1] - ai[i] - 1;
2107         while (nz--) t[*vj++] -= x[i]*(*v++);
2108         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2109       }
2110       for (i=m-1; i>=0; i--){
2111         d  = *(aa + ai[i]);
2112         v  = aa + ai[i] + 1;
2113         vj = aj + ai[i] + 1;
2114         nz = ai[i+1] - ai[i] - 1;
2115         sum = t[i];
2116         while (nz--) sum -= x[*vj++]*(*v++);
2117         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2118         x[i] =   (1-omega)*x[i] + omega*sum/d;
2119       }
2120     }
2121   }
2122 
2123   ierr = PetscFree(t);CHKERRQ(ierr);
2124   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2125   if (bb != xx) {
2126     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2133 /*@
2134      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2135               (upper triangular entries in CSR format) provided by the user.
2136 
2137      Collective on MPI_Comm
2138 
2139    Input Parameters:
2140 +  comm - must be an MPI communicator of size 1
2141 .  bs - size of block
2142 .  m - number of rows
2143 .  n - number of columns
2144 .  i - row indices
2145 .  j - column indices
2146 -  a - matrix values
2147 
2148    Output Parameter:
2149 .  mat - the matrix
2150 
2151    Level: intermediate
2152 
2153    Notes:
2154        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2155     once the matrix is destroyed
2156 
2157        You cannot set new nonzero locations into this matrix, that will generate an error.
2158 
2159        The i and j indices are 0 based
2160 
2161 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2162 
2163 @*/
2164 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2165 {
2166   PetscErrorCode ierr;
2167   PetscInt       ii;
2168   Mat_SeqSBAIJ   *sbaij;
2169 
2170   PetscFunctionBegin;
2171   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2172   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2173 
2174   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2175   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2176   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2177   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2178   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2179   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2180 
2181   sbaij->i = i;
2182   sbaij->j = j;
2183   sbaij->a = a;
2184   sbaij->singlemalloc = PETSC_FALSE;
2185   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2186   sbaij->freedata     = PETSC_FALSE;
2187 
2188   for (ii=0; ii<m; ii++) {
2189     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2190 #if defined(PETSC_USE_DEBUG)
2191     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2192 #endif
2193   }
2194 #if defined(PETSC_USE_DEBUG)
2195   for (ii=0; ii<sbaij->i[m]; ii++) {
2196     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2197     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2198   }
2199 #endif
2200 
2201   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2202   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 
2207 
2208 
2209 
2210