xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 0944192f20a7b8bb8de30bd95c415c0bec90b462)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4525d23c0SHong Zhang 
5b582cc96SHong Zhang #undef __FUNCT__
6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7b582cc96SHong Zhang PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8b582cc96SHong Zhang {
9b582cc96SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10b582cc96SHong Zhang   PetscErrorCode ierr;
11b7799381SHong Zhang   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12b7799381SHong Zhang   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13b582cc96SHong Zhang   PetscScalar    *vscale_array;
14b582cc96SHong Zhang   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15b582cc96SHong Zhang   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16b582cc96SHong Zhang   void           *fctx   = coloring->fctx;
17b582cc96SHong Zhang   PetscBool      flg     = PETSC_FALSE;
18b582cc96SHong Zhang   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19d6752224SHong Zhang   PetscScalar    *ca = csp->a,*ca_l;
20b582cc96SHong Zhang 
21b582cc96SHong Zhang   PetscFunctionBegin;
22b582cc96SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23b582cc96SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24b582cc96SHong Zhang   if (flg) {
25b582cc96SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26b582cc96SHong Zhang   } else {
27b582cc96SHong Zhang     PetscBool assembled;
28b582cc96SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29b582cc96SHong Zhang     if (assembled) {
30b582cc96SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31b582cc96SHong Zhang     }
32b582cc96SHong Zhang   }
33b582cc96SHong Zhang   if (!coloring->vscale) {
34b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35b582cc96SHong Zhang   }
36b582cc96SHong Zhang 
37b582cc96SHong Zhang   /* Set w1 = F(x1) */
38b582cc96SHong Zhang   if (!coloring->fset) {
39b582cc96SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40b582cc96SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41b582cc96SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42b582cc96SHong Zhang   } else {
43b582cc96SHong Zhang     coloring->fset = PETSC_FALSE;
44b582cc96SHong Zhang   }
45b582cc96SHong Zhang 
46b582cc96SHong Zhang   if (!coloring->w3) {
47b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48b582cc96SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49b582cc96SHong Zhang   }
50b582cc96SHong Zhang   w3 = coloring->w3;
51b582cc96SHong Zhang 
52b7799381SHong Zhang   /* Compute local scale factors: vscale = dx */
53b582cc96SHong Zhang   if (coloring->htype[0] == 'w') {
54b7799381SHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
55b7799381SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56b7799381SHong Zhang     dx = (1.0 + unorm)*epsilon;
57b7799381SHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58b582cc96SHong Zhang   } else {
59b7799381SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60b7799381SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61b7799381SHong Zhang     for (col=0; col<N; col++) {
62b582cc96SHong Zhang       dx = xx[col];
63b7799381SHong Zhang       if (dx == 0.0) dx = 1.0;
64b582cc96SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65b582cc96SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66b582cc96SHong Zhang       dx               *= epsilon;
67b7799381SHong Zhang       vscale_array[col] = dx;
68b582cc96SHong Zhang     }
69b7799381SHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70b7799381SHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
7187d34202SHong Zhang   }
7287d34202SHong Zhang 
73b582cc96SHong Zhang   nz = 0;
74b7799381SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75b582cc96SHong Zhang   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76b582cc96SHong Zhang     coloring->currentcolor = k;
7787d34202SHong Zhang 
7887d34202SHong Zhang     /* Compute w3 = x1 + dx */
79b582cc96SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80b7799381SHong Zhang     dy_i = dy;
81b7799381SHong Zhang     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82b582cc96SHong Zhang       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83b582cc96SHong Zhang       for (l=0; l<coloring->ncolumns[k]; l++) {
84b582cc96SHong Zhang         col            = i + bs*coloring->columns[k][l];
85b582cc96SHong Zhang         w3_array[col] += vscale_array[col];
86b7799381SHong Zhang         if (i) {
87b7799381SHong Zhang           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88b7799381SHong Zhang         }
89b582cc96SHong Zhang       }
90b582cc96SHong Zhang       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91b582cc96SHong Zhang 
92b7799381SHong Zhang       /* Evaluate function w2 = F(w3) - F(x1) */
93b582cc96SHong Zhang       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94b7799381SHong Zhang       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95b7799381SHong Zhang       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96b582cc96SHong Zhang       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97b7799381SHong Zhang       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98b7799381SHong Zhang       ierr = VecResetArray(w2);CHKERRQ(ierr);
99b7799381SHong Zhang       dy_i += N; /* points to dy+i*N */
10087d34202SHong Zhang     }
101b582cc96SHong Zhang 
102b7799381SHong Zhang     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103b582cc96SHong Zhang     for (l=0; l<coloring->nrows[k]; l++) {
104b7799381SHong Zhang       row   = bs*coloring->rowcolden2sp3[nz++];
105b7799381SHong Zhang       col   = bs*coloring->rowcolden2sp3[nz++];
106b7799381SHong Zhang       ca_l  = ca + bs2*coloring->rowcolden2sp3[nz++];
10726cb46bfSHong Zhang       spidx = 0;
108b7799381SHong Zhang       dy_i  = dy;
10926cb46bfSHong Zhang       for (i=0; i<bs; i++) {   /* column of the block */
11026cb46bfSHong Zhang         for (j=0; j<bs; j++) { /* row of the block */
111b7799381SHong Zhang           ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i];
112b582cc96SHong Zhang         }
113b7799381SHong Zhang         dy_i += N; /* points to dy+i*N */
11426cb46bfSHong Zhang       }
11526cb46bfSHong Zhang     }
116b582cc96SHong Zhang   } /* endof for each color */
117b582cc96SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
118b7799381SHong Zhang 
119b582cc96SHong Zhang   coloring->currentcolor = -1;
120b582cc96SHong Zhang   PetscFunctionReturn(0);
121b582cc96SHong Zhang }
122b582cc96SHong Zhang 
123525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
124525d23c0SHong Zhang #undef __FUNCT__
125525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
126525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
127525d23c0SHong Zhang {
128525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
129525d23c0SHong Zhang   PetscErrorCode ierr;
130*0944192fSHong Zhang   PetscInt       k,l,row,col,N;
131525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
132525d23c0SHong Zhang   PetscScalar    *vscale_array;
133525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
134525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
135525d23c0SHong Zhang   void           *fctx=coloring->fctx;
136*0944192fSHong Zhang   PetscBool      flg=PETSC_FALSE;
137*0944192fSHong Zhang   Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
138*0944192fSHong Zhang   PetscScalar    *ca=csp->a;
139525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
140525d23c0SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
141525d23c0SHong Zhang 
142525d23c0SHong Zhang   PetscFunctionBegin;
143525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
144525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
145525d23c0SHong Zhang   if (flg) {
146525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
147525d23c0SHong Zhang   } else {
148525d23c0SHong Zhang     PetscBool assembled;
149525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
150525d23c0SHong Zhang     if (assembled) {
151525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
152525d23c0SHong Zhang     }
153525d23c0SHong Zhang   }
154525d23c0SHong Zhang   if (!coloring->vscale) {
155525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
156525d23c0SHong Zhang   }
157525d23c0SHong Zhang 
158525d23c0SHong Zhang   /* Set w1 = F(x1) */
159525d23c0SHong Zhang   if (!coloring->fset) {
160525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
161525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
162525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
163525d23c0SHong Zhang   } else {
164525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
165525d23c0SHong Zhang   }
166525d23c0SHong Zhang 
167525d23c0SHong Zhang   if (!coloring->w3) {
168525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
169525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
170525d23c0SHong Zhang   }
171525d23c0SHong Zhang   w3 = coloring->w3;
172525d23c0SHong Zhang 
173*0944192fSHong Zhang   /* Compute scale factors: vscale = dx */
174*0944192fSHong Zhang   if (coloring->htype[0] == 'w') {
175*0944192fSHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
176*0944192fSHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
177*0944192fSHong Zhang     dx = (1.0 + unorm)*epsilon;
178*0944192fSHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
179*0944192fSHong Zhang   } else {
180525d23c0SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
181525d23c0SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
182525d23c0SHong Zhang     ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
183525d23c0SHong Zhang     for (col=0; col<N; col++) {
184525d23c0SHong Zhang       dx = xx[col];
185*0944192fSHong Zhang       if (dx == 0.0) dx = 1.0;
186525d23c0SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
187525d23c0SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
188525d23c0SHong Zhang       dx               *= epsilon;
189*0944192fSHong Zhang       vscale_array[col] = dx;
190525d23c0SHong Zhang     }
191*0944192fSHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
192*0944192fSHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
193*0944192fSHong Zhang   }
194525d23c0SHong Zhang 
195525d23c0SHong Zhang   nz  = 0;
196*0944192fSHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
197525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
198525d23c0SHong Zhang     coloring->currentcolor = k;
199525d23c0SHong Zhang 
200525d23c0SHong Zhang     /*
201525d23c0SHong Zhang       Loop over each column associated with color
202525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
203525d23c0SHong Zhang     */
204525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
205525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
206525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
207525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
208525d23c0SHong Zhang       col = columns[k][l];
209525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
210525d23c0SHong Zhang     }
211525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
212525d23c0SHong Zhang 
213525d23c0SHong Zhang     /*
214525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
215525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
216525d23c0SHong Zhang     */
217525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
218525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
219525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
220525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
221525d23c0SHong Zhang 
222525d23c0SHong Zhang     /*
223525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
224525d23c0SHong Zhang     */
225525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
226525d23c0SHong Zhang     nrows_k = nrows[k];
227525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
228525d23c0SHong Zhang       row   = coloring->rowcolden2sp3[nz++];
229525d23c0SHong Zhang       col   = coloring->rowcolden2sp3[nz++];
230525d23c0SHong Zhang       spidx = coloring->rowcolden2sp3[nz++];
231*0944192fSHong Zhang       ca[spidx] = y[row]/vscale_array[col];
232525d23c0SHong Zhang     }
233525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
234525d23c0SHong Zhang   } /* endof for each color */
235525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
236525d23c0SHong Zhang 
237525d23c0SHong Zhang   coloring->currentcolor = -1;
238525d23c0SHong Zhang   PetscFunctionReturn(0);
239525d23c0SHong Zhang }
240639f9d9dSBarry Smith 
24179c1e64dSHong Zhang #undef __FUNCT__
24279c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
24379c1e64dSHong Zhang /*
24479c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
24579c1e64dSHong Zhang */
24679c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
24779c1e64dSHong Zhang {
24879c1e64dSHong Zhang   PetscErrorCode ierr;
24979c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
25079c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
25152011a10SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
25279c1e64dSHong Zhang   IS             *isa;
253525d23c0SHong Zhang   PetscBool      isBAIJ;
25479c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
25579c1e64dSHong Zhang 
25679c1e64dSHong Zhang   PetscFunctionBegin;
25779c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
25852011a10SHong Zhang 
259476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
260476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
26179c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
262525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
263525d23c0SHong Zhang   if (!isBAIJ) {
26452011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
26579c1e64dSHong Zhang   }
26679c1e64dSHong Zhang   N         = mat->cmap->N/bs;
26779c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
26879c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
26979c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
27079c1e64dSHong Zhang   c->rstart = 0;
27179c1e64dSHong Zhang 
27279c1e64dSHong Zhang   c->ncolors = nis;
27379c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
27479c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
27579c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
27685740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
27779c1e64dSHong Zhang 
278525d23c0SHong Zhang   if (isBAIJ) {
279525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
280525d23c0SHong Zhang   } else {
2816378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
282525d23c0SHong Zhang   }
28379c1e64dSHong Zhang 
284476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
28579c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
28679c1e64dSHong Zhang 
28717a7dee1SHong Zhang   nz = 0;
28879c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
28979c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
29079c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
29179c1e64dSHong Zhang 
29279c1e64dSHong Zhang     c->ncolumns[i] = n;
29379c1e64dSHong Zhang     if (n) {
29479c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
29579c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
29679c1e64dSHong Zhang     } else {
29779c1e64dSHong Zhang       c->columns[i] = 0;
29879c1e64dSHong Zhang     }
29979c1e64dSHong Zhang 
30079c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
301476e0d0aSHong Zhang     nrows = 0;
30279c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
30379c1e64dSHong Zhang       col  = is[j];
30479c1e64dSHong Zhang       rows = cj + ci[col];
30579c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
30679c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
30779c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
308476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
309476e0d0aSHong Zhang         nrows++;
31079c1e64dSHong Zhang       }
31179c1e64dSHong Zhang     }
312476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
31379c1e64dSHong Zhang 
31479c1e64dSHong Zhang     nrows = 0;
31579c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
31679c1e64dSHong Zhang       if (rowhit[j]) {
31717a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
31817a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
31917a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
32079c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
32179c1e64dSHong Zhang       }
32279c1e64dSHong Zhang     }
32379c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
32479c1e64dSHong Zhang   }
32517a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
32685740eacSHong Zhang 
327525d23c0SHong Zhang   if (isBAIJ) {
328525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
329525d23c0SHong Zhang   } else {
3306378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
331525d23c0SHong Zhang   }
332476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
33379c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
334476e0d0aSHong Zhang 
3354737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
336b582cc96SHong Zhang   if (isBAIJ) {
337b582cc96SHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
338b7799381SHong Zhang     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
339b582cc96SHong Zhang   } else {
34079c1e64dSHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
341b582cc96SHong Zhang   }
34252011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
34379c1e64dSHong Zhang   PetscFunctionReturn(0);
34479c1e64dSHong Zhang }
34579c1e64dSHong Zhang 
3464a2ae208SSatish Balay #undef __FUNCT__
3474a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
3483acb8795SBarry Smith /*
3493acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
3503acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
3513acb8795SBarry Smith */
352dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
353639f9d9dSBarry Smith {
3546849ba73SBarry Smith   PetscErrorCode ierr;
3551a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
3561a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
3573acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
358b9617806SBarry Smith   IS             *isa;
359ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
360476e0d0aSHong Zhang   PetscBool      flg1;
36179c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
362639f9d9dSBarry Smith 
3633a40ed3dSBarry Smith   PetscFunctionBegin;
36479c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
36579c1e64dSHong Zhang   if (new_impl){
36679c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
36779c1e64dSHong Zhang     PetscFunctionReturn(0);
36879c1e64dSHong Zhang   }
369e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
370522c5e43SBarry Smith 
371b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
372476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
373476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
374251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
375476e0d0aSHong Zhang   if (flg1) {
3763acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3773acb8795SBarry Smith   }
3783acb8795SBarry Smith 
3793acb8795SBarry Smith   N         = mat->cmap->N/bs;
3803acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3813acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
3823acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
383005c665bSBarry Smith   c->rstart = 0;
384005c665bSBarry Smith 
385639f9d9dSBarry Smith   c->ncolors = nis;
38638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
38738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
38838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
38938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
39038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
39143a90d84SBarry Smith 
3923acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
393ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
39470c3da92SBarry Smith 
39570c3da92SBarry Smith   /*
396639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
39770c3da92SBarry Smith   */
3980298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
39970c3da92SBarry Smith 
40038baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
40138baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
40270c3da92SBarry Smith 
403639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
404b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
405639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
4062205254eSKarl Rupp 
407639f9d9dSBarry Smith     c->ncolumns[i] = n;
408639f9d9dSBarry Smith     if (n) {
40938baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
41038baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
41170c3da92SBarry Smith     } else {
412639f9d9dSBarry Smith       c->columns[i] = 0;
41370c3da92SBarry Smith     }
41470c3da92SBarry Smith 
4153a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
4163a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
41738baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
418639f9d9dSBarry Smith       /* loop over columns*/
419639f9d9dSBarry Smith       for (j=0; j<n; j++) {
420639f9d9dSBarry Smith         col  = is[j];
421639f9d9dSBarry Smith         rows = cj + ci[col];
422639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
423639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
424639f9d9dSBarry Smith         for (k=0; k<m; k++) {
425639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
42670c3da92SBarry Smith         }
42770c3da92SBarry Smith       }
428639f9d9dSBarry Smith       /* count the number of hits */
429639f9d9dSBarry Smith       nrows = 0;
43070c3da92SBarry Smith       for (j=0; j<N; j++) {
431639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
432639f9d9dSBarry Smith       }
433639f9d9dSBarry Smith       c->nrows[i] = nrows;
43438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
43538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
436639f9d9dSBarry Smith       nrows       = 0;
437639f9d9dSBarry Smith       for (j=0; j<N; j++) {
438639f9d9dSBarry Smith         if (rowhit[j]) {
439639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
440639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
441639f9d9dSBarry Smith           nrows++;
44270c3da92SBarry Smith         }
44370c3da92SBarry Smith       }
444639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
4453a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
44638baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
447639f9d9dSBarry Smith       rowhit[N] = N;
448639f9d9dSBarry Smith       nrows     = 0;
449639f9d9dSBarry Smith       /* loop over columns */
450639f9d9dSBarry Smith       for (j=0; j<n; j++) {
451639f9d9dSBarry Smith         col  = is[j];
452639f9d9dSBarry Smith         rows = cj + ci[col];
453639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
454639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
455639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
456639f9d9dSBarry Smith         for (k=0; k<m; k++) {
457639f9d9dSBarry Smith           currentcol = *rows++;
458639f9d9dSBarry Smith           /* is it already in the list? */
459639f9d9dSBarry Smith           do {
460639f9d9dSBarry Smith             mfm = fm;
461639f9d9dSBarry Smith             fm  = rowhit[fm];
462639f9d9dSBarry Smith           } while (fm < currentcol);
463639f9d9dSBarry Smith           /* not in list so add it */
464639f9d9dSBarry Smith           if (fm != currentcol) {
465639f9d9dSBarry Smith             nrows++;
466639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
467639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
468639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
469639f9d9dSBarry Smith             rowhit[currentcol] = fm;
470639f9d9dSBarry Smith             fm                 = currentcol;
471639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
47271cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
473639f9d9dSBarry Smith         }
474639f9d9dSBarry Smith       }
475639f9d9dSBarry Smith       c->nrows[i] = nrows;
47638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
47738baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
478639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
479639f9d9dSBarry Smith       nrows = 0;
480639f9d9dSBarry Smith       fm    = rowhit[N];
481639f9d9dSBarry Smith       do {
482639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
483639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
484639f9d9dSBarry Smith         fm                           = rowhit[fm];
485639f9d9dSBarry Smith       } while (fm < N);
486639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
487639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
488639f9d9dSBarry Smith   }
4893acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
490639f9d9dSBarry Smith 
491606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
492606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
493639f9d9dSBarry Smith 
49430b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
49530b34957SBarry Smith   /*
49630b34957SBarry Smith        see the version for MPIAIJ
49730b34957SBarry Smith   */
498ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
49938baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
50030b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
50138baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
50230b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
50330b34957SBarry Smith       col = c->columnsforrow[k][l];
5042205254eSKarl Rupp 
50530b34957SBarry Smith       c->vscaleforrow[k][l] = col;
50630b34957SBarry Smith     }
50730b34957SBarry Smith   }
508b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
5093a40ed3dSBarry Smith   PetscFunctionReturn(0);
51070c3da92SBarry Smith }
51179c1e64dSHong Zhang 
512