xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 4b2e90ca22e5d236f72fef601ed2fa3dbdcf4eae)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4525d23c0SHong Zhang 
5b582cc96SHong Zhang #undef __FUNCT__
6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7b582cc96SHong Zhang PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8b582cc96SHong Zhang {
9b582cc96SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10b582cc96SHong Zhang   PetscErrorCode ierr;
119a09c712SHong Zhang   PetscInt       bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12b7799381SHong Zhang   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13b582cc96SHong Zhang   PetscScalar    *vscale_array;
14b582cc96SHong Zhang   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15b582cc96SHong Zhang   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16b582cc96SHong Zhang   void           *fctx   = coloring->fctx;
17b582cc96SHong Zhang   PetscBool      flg     = PETSC_FALSE;
189a09c712SHong Zhang   PetscScalar    *valaddr;
199a09c712SHong Zhang   MatEntry       *Jentry=coloring->matentry;
20b582cc96SHong Zhang 
21b582cc96SHong Zhang   PetscFunctionBegin;
22b582cc96SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23b582cc96SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24b582cc96SHong Zhang   if (flg) {
25b582cc96SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26b582cc96SHong Zhang   } else {
27b582cc96SHong Zhang     PetscBool assembled;
28b582cc96SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29b582cc96SHong Zhang     if (assembled) {
30b582cc96SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31b582cc96SHong Zhang     }
32b582cc96SHong Zhang   }
33b582cc96SHong Zhang   if (!coloring->vscale) {
34b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35b582cc96SHong Zhang   }
36b582cc96SHong Zhang 
37b582cc96SHong Zhang   /* Set w1 = F(x1) */
38b582cc96SHong Zhang   if (!coloring->fset) {
39b582cc96SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40b582cc96SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41b582cc96SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42b582cc96SHong Zhang   } else {
43b582cc96SHong Zhang     coloring->fset = PETSC_FALSE;
44b582cc96SHong Zhang   }
45b582cc96SHong Zhang 
46b582cc96SHong Zhang   if (!coloring->w3) {
47b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48b582cc96SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49b582cc96SHong Zhang   }
50b582cc96SHong Zhang   w3 = coloring->w3;
51b582cc96SHong Zhang 
52b7799381SHong Zhang   /* Compute local scale factors: vscale = dx */
53b582cc96SHong Zhang   if (coloring->htype[0] == 'w') {
54b7799381SHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
55b7799381SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56b7799381SHong Zhang     dx = (1.0 + unorm)*epsilon;
57b7799381SHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58b582cc96SHong Zhang   } else {
59b7799381SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60b7799381SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61b7799381SHong Zhang     for (col=0; col<N; col++) {
62b582cc96SHong Zhang       dx = xx[col];
63b7799381SHong Zhang       if (dx == 0.0) dx = 1.0;
64b582cc96SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65b582cc96SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66b582cc96SHong Zhang       dx               *= epsilon;
67b7799381SHong Zhang       vscale_array[col] = dx;
68b582cc96SHong Zhang     }
69b7799381SHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70b7799381SHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
7187d34202SHong Zhang   }
7287d34202SHong Zhang 
73b582cc96SHong Zhang   nz = 0;
74b7799381SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75b582cc96SHong Zhang   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76b582cc96SHong Zhang     coloring->currentcolor = k;
7787d34202SHong Zhang 
7887d34202SHong Zhang     /* Compute w3 = x1 + dx */
79b582cc96SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80b7799381SHong Zhang     dy_i = dy;
81b7799381SHong Zhang     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82b582cc96SHong Zhang       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83b582cc96SHong Zhang       for (l=0; l<coloring->ncolumns[k]; l++) {
84b582cc96SHong Zhang         col            = i + bs*coloring->columns[k][l];
85b582cc96SHong Zhang         w3_array[col] += vscale_array[col];
86b7799381SHong Zhang         if (i) {
87b7799381SHong Zhang           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88b7799381SHong Zhang         }
89b582cc96SHong Zhang       }
90b582cc96SHong Zhang       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91b582cc96SHong Zhang 
92b7799381SHong Zhang       /* Evaluate function w2 = F(w3) - F(x1) */
93b582cc96SHong Zhang       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94b7799381SHong Zhang       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95b7799381SHong Zhang       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96b582cc96SHong Zhang       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97b7799381SHong Zhang       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98b7799381SHong Zhang       ierr = VecResetArray(w2);CHKERRQ(ierr);
99b7799381SHong Zhang       dy_i += N; /* points to dy+i*N */
10087d34202SHong Zhang     }
101b582cc96SHong Zhang 
102b7799381SHong Zhang     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103b582cc96SHong Zhang     for (l=0; l<coloring->nrows[k]; l++) {
1049a09c712SHong Zhang       row     = bs*Jentry[nz].row;
1059a09c712SHong Zhang       col     = bs*Jentry[nz].col;
1069a09c712SHong Zhang       valaddr = Jentry[nz].valaddr;
1079a09c712SHong Zhang       nz++;
10826cb46bfSHong Zhang       spidx = 0;
109b7799381SHong Zhang       dy_i  = dy;
11026cb46bfSHong Zhang       for (i=0; i<bs; i++) {   /* column of the block */
11126cb46bfSHong Zhang         for (j=0; j<bs; j++) { /* row of the block */
1129a09c712SHong Zhang           valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i];
113b582cc96SHong Zhang         }
114b7799381SHong Zhang         dy_i += N; /* points to dy+i*N */
11526cb46bfSHong Zhang       }
11626cb46bfSHong Zhang     }
117b582cc96SHong Zhang   } /* endof for each color */
118b582cc96SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
119b7799381SHong Zhang 
120b582cc96SHong Zhang   coloring->currentcolor = -1;
121b582cc96SHong Zhang   PetscFunctionReturn(0);
122b582cc96SHong Zhang }
123b582cc96SHong Zhang 
124525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
125525d23c0SHong Zhang #undef __FUNCT__
126525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
127525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
128525d23c0SHong Zhang {
129525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
130525d23c0SHong Zhang   PetscErrorCode ierr;
1310944192fSHong Zhang   PetscInt       k,l,row,col,N;
132525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
133525d23c0SHong Zhang   PetscScalar    *vscale_array;
134525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
135525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
136525d23c0SHong Zhang   void           *fctx=coloring->fctx;
1370944192fSHong Zhang   PetscBool      flg=PETSC_FALSE;
138525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
1399a09c712SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz;
1409a09c712SHong Zhang   MatEntry       *Jentry=coloring->matentry;
141525d23c0SHong Zhang 
142525d23c0SHong Zhang   PetscFunctionBegin;
143525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
144525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
145525d23c0SHong Zhang   if (flg) {
146525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
147525d23c0SHong Zhang   } else {
148525d23c0SHong Zhang     PetscBool assembled;
149525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
150525d23c0SHong Zhang     if (assembled) {
151525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
152525d23c0SHong Zhang     }
153525d23c0SHong Zhang   }
154525d23c0SHong Zhang   if (!coloring->vscale) {
155525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
156525d23c0SHong Zhang   }
157525d23c0SHong Zhang 
158525d23c0SHong Zhang   /* Set w1 = F(x1) */
159525d23c0SHong Zhang   if (!coloring->fset) {
160525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
161525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
162525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
163525d23c0SHong Zhang   } else {
164525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
165525d23c0SHong Zhang   }
166525d23c0SHong Zhang 
167525d23c0SHong Zhang   if (!coloring->w3) {
168525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
169525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
170525d23c0SHong Zhang   }
171525d23c0SHong Zhang   w3 = coloring->w3;
172525d23c0SHong Zhang 
1730944192fSHong Zhang   /* Compute scale factors: vscale = dx */
1740944192fSHong Zhang   if (coloring->htype[0] == 'w') {
1750944192fSHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
1760944192fSHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
1770944192fSHong Zhang     dx = (1.0 + unorm)*epsilon;
1780944192fSHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
1790944192fSHong Zhang   } else {
180525d23c0SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
181525d23c0SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
182525d23c0SHong Zhang     ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
183525d23c0SHong Zhang     for (col=0; col<N; col++) {
184525d23c0SHong Zhang       dx = xx[col];
1850944192fSHong Zhang       if (dx == 0.0) dx = 1.0;
186525d23c0SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
187525d23c0SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
188525d23c0SHong Zhang       dx               *= epsilon;
1890944192fSHong Zhang       vscale_array[col] = dx;
190525d23c0SHong Zhang     }
1910944192fSHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
1920944192fSHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
1930944192fSHong Zhang   }
194525d23c0SHong Zhang 
195525d23c0SHong Zhang   nz  = 0;
1960944192fSHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
197525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
198525d23c0SHong Zhang     coloring->currentcolor = k;
199525d23c0SHong Zhang 
200525d23c0SHong Zhang     /*
201525d23c0SHong Zhang       Loop over each column associated with color
202525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
203525d23c0SHong Zhang     */
204525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
205525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
206525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
207525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
208525d23c0SHong Zhang       col = columns[k][l];
209525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
210525d23c0SHong Zhang     }
211525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
212525d23c0SHong Zhang 
213525d23c0SHong Zhang     /*
214525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
215525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
216525d23c0SHong Zhang     */
217525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
218525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
219525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
220525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
221525d23c0SHong Zhang 
222525d23c0SHong Zhang     /*
223525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
224525d23c0SHong Zhang     */
225525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
226525d23c0SHong Zhang     nrows_k = nrows[k];
2279a09c712SHong Zhang     nrows_k = nrows[k];
228525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
2299a09c712SHong Zhang       row                     = Jentry[nz].row;
2309a09c712SHong Zhang       col                     = Jentry[nz].col;
2319a09c712SHong Zhang       *(Jentry[nz++].valaddr) = y[row]/vscale_array[col];
232525d23c0SHong Zhang     }
233525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
234525d23c0SHong Zhang   } /* endof for each color */
235525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
236525d23c0SHong Zhang 
237525d23c0SHong Zhang   coloring->currentcolor = -1;
238525d23c0SHong Zhang   PetscFunctionReturn(0);
239525d23c0SHong Zhang }
240639f9d9dSBarry Smith 
24179c1e64dSHong Zhang #undef __FUNCT__
242*4b2e90caSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new"
24379c1e64dSHong Zhang /*
24479c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
24579c1e64dSHong Zhang */
246*4b2e90caSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c)
24779c1e64dSHong Zhang {
24879c1e64dSHong Zhang   PetscErrorCode ierr;
24979c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
250*4b2e90caSHong Zhang   const PetscInt *is,*row,*ci,*cj;
251*4b2e90caSHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz;
25279c1e64dSHong Zhang   IS             *isa;
253525d23c0SHong Zhang   PetscBool      isBAIJ;
254*4b2e90caSHong Zhang   Mat_SeqAIJ     *spA = (Mat_SeqAIJ*)mat->data;
255*4b2e90caSHong Zhang   PetscScalar    *A_val=spA->a;
2569a09c712SHong Zhang   PetscScalar    **valaddrhit;
2579a09c712SHong Zhang   MatEntry       *Jentry;
25879c1e64dSHong Zhang 
25979c1e64dSHong Zhang   PetscFunctionBegin;
26079c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
26152011a10SHong Zhang 
262476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
263476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
26479c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
265525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
266525d23c0SHong Zhang   if (!isBAIJ) {
26752011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
26879c1e64dSHong Zhang   }
26979c1e64dSHong Zhang   N         = mat->cmap->N/bs;
27079c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
27179c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
27279c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
27379c1e64dSHong Zhang   c->rstart = 0;
27479c1e64dSHong Zhang 
27579c1e64dSHong Zhang   c->ncolors = nis;
27679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
27779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
27879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
279*4b2e90caSHong Zhang   ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
2809a09c712SHong Zhang 
281*4b2e90caSHong Zhang   ierr       = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
282*4b2e90caSHong Zhang   ierr       = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr);
2839a09c712SHong Zhang   c->matentry = Jentry;
28479c1e64dSHong Zhang 
285525d23c0SHong Zhang   if (isBAIJ) {
286525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
287525d23c0SHong Zhang   } else {
2886378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
289525d23c0SHong Zhang   }
29079c1e64dSHong Zhang 
291*4b2e90caSHong Zhang   ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
29279c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
29379c1e64dSHong Zhang 
29417a7dee1SHong Zhang   nz = 0;
29579c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
29679c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
29779c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
29879c1e64dSHong Zhang 
29979c1e64dSHong Zhang     c->ncolumns[i] = n;
30079c1e64dSHong Zhang     if (n) {
30179c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
302*4b2e90caSHong Zhang       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
30379c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
30479c1e64dSHong Zhang     } else {
30579c1e64dSHong Zhang       c->columns[i] = 0;
30679c1e64dSHong Zhang     }
30779c1e64dSHong Zhang 
30879c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
3099a09c712SHong Zhang     bs2   = bs*bs;
310476e0d0aSHong Zhang     nrows = 0;
31179c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
31279c1e64dSHong Zhang       col    = is[j];
313*4b2e90caSHong Zhang       row    = cj + ci[col];
31479c1e64dSHong Zhang       m      = ci[col+1] - ci[col];
315*4b2e90caSHong Zhang       nrows += m;
31679c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
317*4b2e90caSHong Zhang         rowhit[*row]       = col + 1;
318*4b2e90caSHong Zhang         valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]];
31979c1e64dSHong Zhang       }
32079c1e64dSHong Zhang     }
321476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
32279c1e64dSHong Zhang 
32379c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
32479c1e64dSHong Zhang       if (rowhit[j]) {
3259a09c712SHong Zhang         Jentry[nz].row     = j;              /* local row index */
3269a09c712SHong Zhang         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
3279a09c712SHong Zhang         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
3289a09c712SHong Zhang         nz++;
32979c1e64dSHong Zhang         rowhit[j] = 0.0;                     /* zero rowhit for reuse */
33079c1e64dSHong Zhang       }
33179c1e64dSHong Zhang     }
33279c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
33379c1e64dSHong Zhang   }
334*4b2e90caSHong Zhang   if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz);
33585740eacSHong Zhang 
336525d23c0SHong Zhang   if (isBAIJ) {
337525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
338525d23c0SHong Zhang   } else {
3396378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
340525d23c0SHong Zhang   }
341*4b2e90caSHong Zhang   ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
34279c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
343476e0d0aSHong Zhang 
3444737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
345b582cc96SHong Zhang   if (isBAIJ) {
346b582cc96SHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
347b7799381SHong Zhang     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
348b582cc96SHong Zhang   } else {
34979c1e64dSHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
350b582cc96SHong Zhang   }
35152011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
35279c1e64dSHong Zhang   PetscFunctionReturn(0);
35379c1e64dSHong Zhang }
35479c1e64dSHong Zhang 
3554a2ae208SSatish Balay #undef __FUNCT__
3564a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
3573acb8795SBarry Smith /*
3583acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
3593acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
3603acb8795SBarry Smith */
361dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
362639f9d9dSBarry Smith {
3636849ba73SBarry Smith   PetscErrorCode ierr;
3641a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
3651a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
3663acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
367b9617806SBarry Smith   IS             *isa;
368ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
369476e0d0aSHong Zhang   PetscBool      flg1;
37079c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
371639f9d9dSBarry Smith 
3723a40ed3dSBarry Smith   PetscFunctionBegin;
37379c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
37479c1e64dSHong Zhang   if (new_impl){
375*4b2e90caSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr);
37679c1e64dSHong Zhang     PetscFunctionReturn(0);
37779c1e64dSHong Zhang   }
378e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
379522c5e43SBarry Smith 
380b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
381476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
382476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
383251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
384476e0d0aSHong Zhang   if (flg1) {
3853acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3863acb8795SBarry Smith   }
3873acb8795SBarry Smith 
3883acb8795SBarry Smith   N         = mat->cmap->N/bs;
3893acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3903acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
3913acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
392005c665bSBarry Smith   c->rstart = 0;
393005c665bSBarry Smith 
394639f9d9dSBarry Smith   c->ncolors = nis;
39538baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
39638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
39738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
39838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
39938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
40043a90d84SBarry Smith 
4013acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
402ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
40370c3da92SBarry Smith 
40470c3da92SBarry Smith   /*
405639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
40670c3da92SBarry Smith   */
4070298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
40870c3da92SBarry Smith 
40938baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
41038baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
41170c3da92SBarry Smith 
412639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
413b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
414639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
4152205254eSKarl Rupp 
416639f9d9dSBarry Smith     c->ncolumns[i] = n;
417639f9d9dSBarry Smith     if (n) {
41838baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
41938baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
42070c3da92SBarry Smith     } else {
421639f9d9dSBarry Smith       c->columns[i] = 0;
42270c3da92SBarry Smith     }
42370c3da92SBarry Smith 
4243a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
4253a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
42638baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
427639f9d9dSBarry Smith       /* loop over columns*/
428639f9d9dSBarry Smith       for (j=0; j<n; j++) {
429639f9d9dSBarry Smith         col  = is[j];
430639f9d9dSBarry Smith         rows = cj + ci[col];
431639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
432639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
433639f9d9dSBarry Smith         for (k=0; k<m; k++) {
434639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
43570c3da92SBarry Smith         }
43670c3da92SBarry Smith       }
437639f9d9dSBarry Smith       /* count the number of hits */
438639f9d9dSBarry Smith       nrows = 0;
43970c3da92SBarry Smith       for (j=0; j<N; j++) {
440639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
441639f9d9dSBarry Smith       }
442639f9d9dSBarry Smith       c->nrows[i] = nrows;
44338baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
44438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
445639f9d9dSBarry Smith       nrows       = 0;
446639f9d9dSBarry Smith       for (j=0; j<N; j++) {
447639f9d9dSBarry Smith         if (rowhit[j]) {
448639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
449639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
450639f9d9dSBarry Smith           nrows++;
45170c3da92SBarry Smith         }
45270c3da92SBarry Smith       }
453639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
4543a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
45538baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
456639f9d9dSBarry Smith       rowhit[N] = N;
457639f9d9dSBarry Smith       nrows     = 0;
458639f9d9dSBarry Smith       /* loop over columns */
459639f9d9dSBarry Smith       for (j=0; j<n; j++) {
460639f9d9dSBarry Smith         col  = is[j];
461639f9d9dSBarry Smith         rows = cj + ci[col];
462639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
463639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
464639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
465639f9d9dSBarry Smith         for (k=0; k<m; k++) {
466639f9d9dSBarry Smith           currentcol = *rows++;
467639f9d9dSBarry Smith           /* is it already in the list? */
468639f9d9dSBarry Smith           do {
469639f9d9dSBarry Smith             mfm = fm;
470639f9d9dSBarry Smith             fm  = rowhit[fm];
471639f9d9dSBarry Smith           } while (fm < currentcol);
472639f9d9dSBarry Smith           /* not in list so add it */
473639f9d9dSBarry Smith           if (fm != currentcol) {
474639f9d9dSBarry Smith             nrows++;
475639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
476639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
477639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
478639f9d9dSBarry Smith             rowhit[currentcol] = fm;
479639f9d9dSBarry Smith             fm                 = currentcol;
480639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
48171cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
482639f9d9dSBarry Smith         }
483639f9d9dSBarry Smith       }
484639f9d9dSBarry Smith       c->nrows[i] = nrows;
48538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
48638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
487639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
488639f9d9dSBarry Smith       nrows = 0;
489639f9d9dSBarry Smith       fm    = rowhit[N];
490639f9d9dSBarry Smith       do {
491639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
492639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
493639f9d9dSBarry Smith         fm                           = rowhit[fm];
494639f9d9dSBarry Smith       } while (fm < N);
495639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
496639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
497639f9d9dSBarry Smith   }
4983acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
499639f9d9dSBarry Smith 
500606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
501606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
502639f9d9dSBarry Smith 
50330b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
50430b34957SBarry Smith   /*
50530b34957SBarry Smith        see the version for MPIAIJ
50630b34957SBarry Smith   */
507ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
50838baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
50930b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
51038baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
51130b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
51230b34957SBarry Smith       col = c->columnsforrow[k][l];
5132205254eSKarl Rupp 
51430b34957SBarry Smith       c->vscaleforrow[k][l] = col;
51530b34957SBarry Smith     }
51630b34957SBarry Smith   }
517b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
5183a40ed3dSBarry Smith   PetscFunctionReturn(0);
51970c3da92SBarry Smith }
52079c1e64dSHong Zhang 
521