xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 9a09c71274829a1744640f51900178f70a1c727f)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4525d23c0SHong Zhang 
5b582cc96SHong Zhang #undef __FUNCT__
6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7b582cc96SHong Zhang PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8b582cc96SHong Zhang {
9b582cc96SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10b582cc96SHong Zhang   PetscErrorCode ierr;
11*9a09c712SHong Zhang   PetscInt       bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12b7799381SHong Zhang   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13b582cc96SHong Zhang   PetscScalar    *vscale_array;
14b582cc96SHong Zhang   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15b582cc96SHong Zhang   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16b582cc96SHong Zhang   void           *fctx   = coloring->fctx;
17b582cc96SHong Zhang   PetscBool      flg     = PETSC_FALSE;
18*9a09c712SHong Zhang   PetscScalar    *valaddr;
19*9a09c712SHong Zhang   MatEntry       *Jentry=coloring->matentry;
20b582cc96SHong Zhang 
21b582cc96SHong Zhang   PetscFunctionBegin;
22b582cc96SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23b582cc96SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24b582cc96SHong Zhang   if (flg) {
25b582cc96SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26b582cc96SHong Zhang   } else {
27b582cc96SHong Zhang     PetscBool assembled;
28b582cc96SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29b582cc96SHong Zhang     if (assembled) {
30b582cc96SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31b582cc96SHong Zhang     }
32b582cc96SHong Zhang   }
33b582cc96SHong Zhang   if (!coloring->vscale) {
34b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35b582cc96SHong Zhang   }
36b582cc96SHong Zhang 
37b582cc96SHong Zhang   /* Set w1 = F(x1) */
38b582cc96SHong Zhang   if (!coloring->fset) {
39b582cc96SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40b582cc96SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41b582cc96SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42b582cc96SHong Zhang   } else {
43b582cc96SHong Zhang     coloring->fset = PETSC_FALSE;
44b582cc96SHong Zhang   }
45b582cc96SHong Zhang 
46b582cc96SHong Zhang   if (!coloring->w3) {
47b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48b582cc96SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49b582cc96SHong Zhang   }
50b582cc96SHong Zhang   w3 = coloring->w3;
51b582cc96SHong Zhang 
52b7799381SHong Zhang   /* Compute local scale factors: vscale = dx */
53b582cc96SHong Zhang   if (coloring->htype[0] == 'w') {
54b7799381SHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
55b7799381SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56b7799381SHong Zhang     dx = (1.0 + unorm)*epsilon;
57b7799381SHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58b582cc96SHong Zhang   } else {
59b7799381SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60b7799381SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61b7799381SHong Zhang     for (col=0; col<N; col++) {
62b582cc96SHong Zhang       dx = xx[col];
63b7799381SHong Zhang       if (dx == 0.0) dx = 1.0;
64b582cc96SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65b582cc96SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66b582cc96SHong Zhang       dx               *= epsilon;
67b7799381SHong Zhang       vscale_array[col] = dx;
68b582cc96SHong Zhang     }
69b7799381SHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70b7799381SHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
7187d34202SHong Zhang   }
7287d34202SHong Zhang 
73b582cc96SHong Zhang   nz = 0;
74b7799381SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75b582cc96SHong Zhang   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76b582cc96SHong Zhang     coloring->currentcolor = k;
7787d34202SHong Zhang 
7887d34202SHong Zhang     /* Compute w3 = x1 + dx */
79b582cc96SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80b7799381SHong Zhang     dy_i = dy;
81b7799381SHong Zhang     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82b582cc96SHong Zhang       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83b582cc96SHong Zhang       for (l=0; l<coloring->ncolumns[k]; l++) {
84b582cc96SHong Zhang         col            = i + bs*coloring->columns[k][l];
85b582cc96SHong Zhang         w3_array[col] += vscale_array[col];
86b7799381SHong Zhang         if (i) {
87b7799381SHong Zhang           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88b7799381SHong Zhang         }
89b582cc96SHong Zhang       }
90b582cc96SHong Zhang       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91b582cc96SHong Zhang 
92b7799381SHong Zhang       /* Evaluate function w2 = F(w3) - F(x1) */
93b582cc96SHong Zhang       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94b7799381SHong Zhang       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95b7799381SHong Zhang       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96b582cc96SHong Zhang       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97b7799381SHong Zhang       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98b7799381SHong Zhang       ierr = VecResetArray(w2);CHKERRQ(ierr);
99b7799381SHong Zhang       dy_i += N; /* points to dy+i*N */
10087d34202SHong Zhang     }
101b582cc96SHong Zhang 
102b7799381SHong Zhang     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103b582cc96SHong Zhang     for (l=0; l<coloring->nrows[k]; l++) {
104*9a09c712SHong Zhang       row     = bs*Jentry[nz].row;
105*9a09c712SHong Zhang       col     = bs*Jentry[nz].col;
106*9a09c712SHong Zhang       valaddr = Jentry[nz].valaddr;
107*9a09c712SHong Zhang       nz++;
10826cb46bfSHong Zhang       spidx = 0;
109b7799381SHong Zhang       dy_i  = dy;
11026cb46bfSHong Zhang       for (i=0; i<bs; i++) {   /* column of the block */
11126cb46bfSHong Zhang         for (j=0; j<bs; j++) { /* row of the block */
112*9a09c712SHong Zhang           valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i];
113b582cc96SHong Zhang         }
114b7799381SHong Zhang         dy_i += N; /* points to dy+i*N */
11526cb46bfSHong Zhang       }
11626cb46bfSHong Zhang     }
117b582cc96SHong Zhang   } /* endof for each color */
118b582cc96SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
119b7799381SHong Zhang 
120b582cc96SHong Zhang   coloring->currentcolor = -1;
121b582cc96SHong Zhang   PetscFunctionReturn(0);
122b582cc96SHong Zhang }
123b582cc96SHong Zhang 
124525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
125525d23c0SHong Zhang #undef __FUNCT__
126525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
127525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
128525d23c0SHong Zhang {
129525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
130525d23c0SHong Zhang   PetscErrorCode ierr;
1310944192fSHong Zhang   PetscInt       k,l,row,col,N;
132525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
133525d23c0SHong Zhang   PetscScalar    *vscale_array;
134525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
135525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
136525d23c0SHong Zhang   void           *fctx=coloring->fctx;
1370944192fSHong Zhang   PetscBool      flg=PETSC_FALSE;
138525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
139*9a09c712SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz;
140*9a09c712SHong Zhang   MatEntry       *Jentry=coloring->matentry;
141525d23c0SHong Zhang 
142525d23c0SHong Zhang   PetscFunctionBegin;
143525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
144525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
145525d23c0SHong Zhang   if (flg) {
146525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
147525d23c0SHong Zhang   } else {
148525d23c0SHong Zhang     PetscBool assembled;
149525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
150525d23c0SHong Zhang     if (assembled) {
151525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
152525d23c0SHong Zhang     }
153525d23c0SHong Zhang   }
154525d23c0SHong Zhang   if (!coloring->vscale) {
155525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
156525d23c0SHong Zhang   }
157525d23c0SHong Zhang 
158525d23c0SHong Zhang   /* Set w1 = F(x1) */
159525d23c0SHong Zhang   if (!coloring->fset) {
160525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
161525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
162525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
163525d23c0SHong Zhang   } else {
164525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
165525d23c0SHong Zhang   }
166525d23c0SHong Zhang 
167525d23c0SHong Zhang   if (!coloring->w3) {
168525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
169525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
170525d23c0SHong Zhang   }
171525d23c0SHong Zhang   w3 = coloring->w3;
172525d23c0SHong Zhang 
1730944192fSHong Zhang   /* Compute scale factors: vscale = dx */
1740944192fSHong Zhang   if (coloring->htype[0] == 'w') {
1750944192fSHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
1760944192fSHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
1770944192fSHong Zhang     dx = (1.0 + unorm)*epsilon;
1780944192fSHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
1790944192fSHong Zhang   } else {
180525d23c0SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
181525d23c0SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
182525d23c0SHong Zhang     ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
183525d23c0SHong Zhang     for (col=0; col<N; col++) {
184525d23c0SHong Zhang       dx = xx[col];
1850944192fSHong Zhang       if (dx == 0.0) dx = 1.0;
186525d23c0SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
187525d23c0SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
188525d23c0SHong Zhang       dx               *= epsilon;
1890944192fSHong Zhang       vscale_array[col] = dx;
190525d23c0SHong Zhang     }
1910944192fSHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
1920944192fSHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
1930944192fSHong Zhang   }
194525d23c0SHong Zhang 
195525d23c0SHong Zhang   nz  = 0;
1960944192fSHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
197525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
198525d23c0SHong Zhang     coloring->currentcolor = k;
199525d23c0SHong Zhang 
200525d23c0SHong Zhang     /*
201525d23c0SHong Zhang       Loop over each column associated with color
202525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
203525d23c0SHong Zhang     */
204525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
205525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
206525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
207525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
208525d23c0SHong Zhang       col = columns[k][l];
209525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
210525d23c0SHong Zhang     }
211525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
212525d23c0SHong Zhang 
213525d23c0SHong Zhang     /*
214525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
215525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
216525d23c0SHong Zhang     */
217525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
218525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
219525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
220525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
221525d23c0SHong Zhang 
222525d23c0SHong Zhang     /*
223525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
224525d23c0SHong Zhang     */
225525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
226525d23c0SHong Zhang     nrows_k = nrows[k];
227*9a09c712SHong Zhang     nrows_k = nrows[k];
228525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
229*9a09c712SHong Zhang       row                     = Jentry[nz].row;
230*9a09c712SHong Zhang       col                     = Jentry[nz].col;
231*9a09c712SHong Zhang       *(Jentry[nz++].valaddr) = y[row]/vscale_array[col];
232525d23c0SHong Zhang     }
233525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
234525d23c0SHong Zhang   } /* endof for each color */
235525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
236525d23c0SHong Zhang 
237525d23c0SHong Zhang   coloring->currentcolor = -1;
238525d23c0SHong Zhang   PetscFunctionReturn(0);
239525d23c0SHong Zhang }
240639f9d9dSBarry Smith 
24179c1e64dSHong Zhang #undef __FUNCT__
24279c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
24379c1e64dSHong Zhang /*
24479c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
24579c1e64dSHong Zhang */
24679c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
24779c1e64dSHong Zhang {
24879c1e64dSHong Zhang   PetscErrorCode ierr;
24979c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
25079c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
251*9a09c712SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,bs2,*spidx,*spidxhit,nz;
25279c1e64dSHong Zhang   IS             *isa;
253525d23c0SHong Zhang   PetscBool      isBAIJ;
25479c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
255*9a09c712SHong Zhang   PetscScalar    *mat_val=csp->a;
256*9a09c712SHong Zhang   PetscScalar    **valaddrhit;
257*9a09c712SHong Zhang   MatEntry       *Jentry;
25879c1e64dSHong Zhang 
25979c1e64dSHong Zhang   PetscFunctionBegin;
26079c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
26152011a10SHong Zhang 
262476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
263476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
26479c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
265525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
266525d23c0SHong Zhang   if (!isBAIJ) {
26752011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
26879c1e64dSHong Zhang   }
26979c1e64dSHong Zhang   N         = mat->cmap->N/bs;
27079c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
27179c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
27279c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
27379c1e64dSHong Zhang   c->rstart = 0;
27479c1e64dSHong Zhang 
27579c1e64dSHong Zhang   c->ncolors = nis;
27679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
27779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
27879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
279*9a09c712SHong Zhang 
280*9a09c712SHong Zhang   ierr       = PetscMalloc(csp->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
281*9a09c712SHong Zhang   ierr       = PetscLogObjectMemory((PetscObject)c,csp->nz*sizeof(MatEntry));CHKERRQ(ierr);
282*9a09c712SHong Zhang   c->matentry = Jentry;
28379c1e64dSHong Zhang 
284525d23c0SHong Zhang   if (isBAIJ) {
285525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
286525d23c0SHong Zhang   } else {
2876378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
288525d23c0SHong Zhang   }
28979c1e64dSHong Zhang 
290*9a09c712SHong Zhang   ierr = PetscMalloc4(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
29179c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
29279c1e64dSHong Zhang 
29317a7dee1SHong Zhang   nz = 0;
29479c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
29579c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
29679c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
29779c1e64dSHong Zhang 
29879c1e64dSHong Zhang     c->ncolumns[i] = n;
29979c1e64dSHong Zhang     if (n) {
30079c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
30179c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
30279c1e64dSHong Zhang     } else {
30379c1e64dSHong Zhang       c->columns[i] = 0;
30479c1e64dSHong Zhang     }
30579c1e64dSHong Zhang 
30679c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
307*9a09c712SHong Zhang     bs2 = bs*bs;
308476e0d0aSHong Zhang     nrows = 0;
30979c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
31079c1e64dSHong Zhang       col  = is[j];
31179c1e64dSHong Zhang       rows = cj + ci[col];
31279c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
31379c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
31479c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
315*9a09c712SHong Zhang         spidxhit[*rows]   = spidx[ci[col] + k];
316*9a09c712SHong Zhang         valaddrhit[*rows] = &mat_val[bs2*spidx[ci[col] + k]];
317*9a09c712SHong Zhang         rows++;
318476e0d0aSHong Zhang         nrows++;
31979c1e64dSHong Zhang       }
32079c1e64dSHong Zhang     }
321476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
32279c1e64dSHong Zhang 
32379c1e64dSHong Zhang     nrows = 0;
32479c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
32579c1e64dSHong Zhang       if (rowhit[j]) {
326*9a09c712SHong Zhang         Jentry[nz].row     = j;              /* local row index */
327*9a09c712SHong Zhang         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
328*9a09c712SHong Zhang         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
329*9a09c712SHong Zhang         nz++;
33079c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
33179c1e64dSHong Zhang       }
33279c1e64dSHong Zhang     }
33379c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
33479c1e64dSHong Zhang   }
335*9a09c712SHong Zhang   if (nz != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
33685740eacSHong Zhang 
337525d23c0SHong Zhang   if (isBAIJ) {
338525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
339525d23c0SHong Zhang   } else {
3406378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
341525d23c0SHong Zhang   }
342*9a09c712SHong Zhang   ierr = PetscFree4(rowhit,columnsforrow,spidxhit,valaddrhit);CHKERRQ(ierr);
34379c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
344476e0d0aSHong Zhang 
3454737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
346b582cc96SHong Zhang   if (isBAIJ) {
347b582cc96SHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
348b7799381SHong Zhang     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
349b582cc96SHong Zhang   } else {
35079c1e64dSHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
351b582cc96SHong Zhang   }
35252011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
35379c1e64dSHong Zhang   PetscFunctionReturn(0);
35479c1e64dSHong Zhang }
35579c1e64dSHong Zhang 
3564a2ae208SSatish Balay #undef __FUNCT__
3574a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
3583acb8795SBarry Smith /*
3593acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
3603acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
3613acb8795SBarry Smith */
362dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
363639f9d9dSBarry Smith {
3646849ba73SBarry Smith   PetscErrorCode ierr;
3651a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
3661a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
3673acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
368b9617806SBarry Smith   IS             *isa;
369ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
370476e0d0aSHong Zhang   PetscBool      flg1;
37179c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
372639f9d9dSBarry Smith 
3733a40ed3dSBarry Smith   PetscFunctionBegin;
37479c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
37579c1e64dSHong Zhang   if (new_impl){
37679c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
37779c1e64dSHong Zhang     PetscFunctionReturn(0);
37879c1e64dSHong Zhang   }
379e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
380522c5e43SBarry Smith 
381b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
382476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
383476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
384251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
385476e0d0aSHong Zhang   if (flg1) {
3863acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3873acb8795SBarry Smith   }
3883acb8795SBarry Smith 
3893acb8795SBarry Smith   N         = mat->cmap->N/bs;
3903acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3913acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
3923acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
393005c665bSBarry Smith   c->rstart = 0;
394005c665bSBarry Smith 
395639f9d9dSBarry Smith   c->ncolors = nis;
39638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
39738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
39838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
39938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
40038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
40143a90d84SBarry Smith 
4023acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
403ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
40470c3da92SBarry Smith 
40570c3da92SBarry Smith   /*
406639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
40770c3da92SBarry Smith   */
4080298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
40970c3da92SBarry Smith 
41038baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
41138baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
41270c3da92SBarry Smith 
413639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
414b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
415639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
4162205254eSKarl Rupp 
417639f9d9dSBarry Smith     c->ncolumns[i] = n;
418639f9d9dSBarry Smith     if (n) {
41938baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
42038baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
42170c3da92SBarry Smith     } else {
422639f9d9dSBarry Smith       c->columns[i] = 0;
42370c3da92SBarry Smith     }
42470c3da92SBarry Smith 
4253a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
4263a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
42738baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
428639f9d9dSBarry Smith       /* loop over columns*/
429639f9d9dSBarry Smith       for (j=0; j<n; j++) {
430639f9d9dSBarry Smith         col  = is[j];
431639f9d9dSBarry Smith         rows = cj + ci[col];
432639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
433639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
434639f9d9dSBarry Smith         for (k=0; k<m; k++) {
435639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
43670c3da92SBarry Smith         }
43770c3da92SBarry Smith       }
438639f9d9dSBarry Smith       /* count the number of hits */
439639f9d9dSBarry Smith       nrows = 0;
44070c3da92SBarry Smith       for (j=0; j<N; j++) {
441639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
442639f9d9dSBarry Smith       }
443639f9d9dSBarry Smith       c->nrows[i] = nrows;
44438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
44538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
446639f9d9dSBarry Smith       nrows       = 0;
447639f9d9dSBarry Smith       for (j=0; j<N; j++) {
448639f9d9dSBarry Smith         if (rowhit[j]) {
449639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
450639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
451639f9d9dSBarry Smith           nrows++;
45270c3da92SBarry Smith         }
45370c3da92SBarry Smith       }
454639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
4553a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
45638baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
457639f9d9dSBarry Smith       rowhit[N] = N;
458639f9d9dSBarry Smith       nrows     = 0;
459639f9d9dSBarry Smith       /* loop over columns */
460639f9d9dSBarry Smith       for (j=0; j<n; j++) {
461639f9d9dSBarry Smith         col  = is[j];
462639f9d9dSBarry Smith         rows = cj + ci[col];
463639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
464639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
465639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
466639f9d9dSBarry Smith         for (k=0; k<m; k++) {
467639f9d9dSBarry Smith           currentcol = *rows++;
468639f9d9dSBarry Smith           /* is it already in the list? */
469639f9d9dSBarry Smith           do {
470639f9d9dSBarry Smith             mfm = fm;
471639f9d9dSBarry Smith             fm  = rowhit[fm];
472639f9d9dSBarry Smith           } while (fm < currentcol);
473639f9d9dSBarry Smith           /* not in list so add it */
474639f9d9dSBarry Smith           if (fm != currentcol) {
475639f9d9dSBarry Smith             nrows++;
476639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
477639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
478639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
479639f9d9dSBarry Smith             rowhit[currentcol] = fm;
480639f9d9dSBarry Smith             fm                 = currentcol;
481639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
48271cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
483639f9d9dSBarry Smith         }
484639f9d9dSBarry Smith       }
485639f9d9dSBarry Smith       c->nrows[i] = nrows;
48638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
48738baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
488639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
489639f9d9dSBarry Smith       nrows = 0;
490639f9d9dSBarry Smith       fm    = rowhit[N];
491639f9d9dSBarry Smith       do {
492639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
493639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
494639f9d9dSBarry Smith         fm                           = rowhit[fm];
495639f9d9dSBarry Smith       } while (fm < N);
496639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
497639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
498639f9d9dSBarry Smith   }
4993acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
500639f9d9dSBarry Smith 
501606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
502606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
503639f9d9dSBarry Smith 
50430b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
50530b34957SBarry Smith   /*
50630b34957SBarry Smith        see the version for MPIAIJ
50730b34957SBarry Smith   */
508ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
50938baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
51030b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
51138baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
51230b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
51330b34957SBarry Smith       col = c->columnsforrow[k][l];
5142205254eSKarl Rupp 
51530b34957SBarry Smith       c->vscaleforrow[k][l] = col;
51630b34957SBarry Smith     }
51730b34957SBarry Smith   }
518b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
5193a40ed3dSBarry Smith   PetscFunctionReturn(0);
52070c3da92SBarry Smith }
52179c1e64dSHong Zhang 
522