xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 525d23c034271bd270c20f595b9bb6ddbc2c1c84)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3*525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4*525d23c0SHong Zhang 
5*525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
6*525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */
7*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
8*525d23c0SHong Zhang #include <petsctime.h>
9*525d23c0SHong Zhang #endif
10*525d23c0SHong Zhang #undef __FUNCT__
11*525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
12*525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
13*525d23c0SHong Zhang {
14*525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
15*525d23c0SHong Zhang   PetscErrorCode ierr;
16*525d23c0SHong Zhang   PetscInt       k,l,row,col,N;
17*525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
18*525d23c0SHong Zhang   PetscScalar    *vscale_array;
19*525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
20*525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
21*525d23c0SHong Zhang   void           *fctx=coloring->fctx;
22*525d23c0SHong Zhang   PetscBool      flg=PETSC_FALSE;
23*525d23c0SHong Zhang   Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
24*525d23c0SHong Zhang   PetscScalar    *ca=csp->a;
25*525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
26*525d23c0SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
27*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
28*525d23c0SHong Zhang   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
29*525d23c0SHong Zhang #endif
30*525d23c0SHong Zhang 
31*525d23c0SHong Zhang   PetscFunctionBegin;
32*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
33*525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
34*525d23c0SHong Zhang #endif
35*525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
36*525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
37*525d23c0SHong Zhang   if (flg) {
38*525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
39*525d23c0SHong Zhang   } else {
40*525d23c0SHong Zhang     PetscBool assembled;
41*525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
42*525d23c0SHong Zhang     if (assembled) {
43*525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
44*525d23c0SHong Zhang     }
45*525d23c0SHong Zhang   }
46*525d23c0SHong Zhang 
47*525d23c0SHong Zhang   if (!coloring->vscale) {
48*525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
49*525d23c0SHong Zhang   }
50*525d23c0SHong Zhang 
51*525d23c0SHong Zhang   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
52*525d23c0SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
53*525d23c0SHong Zhang   }
54*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
55*525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
56*525d23c0SHong Zhang     t_init += t1 - t0;
57*525d23c0SHong Zhang #endif
58*525d23c0SHong Zhang 
59*525d23c0SHong Zhang   /* Set w1 = F(x1) */
60*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
61*525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
62*525d23c0SHong Zhang #endif
63*525d23c0SHong Zhang   if (!coloring->fset) {
64*525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
65*525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
66*525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
67*525d23c0SHong Zhang   } else {
68*525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
69*525d23c0SHong Zhang   }
70*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
71*525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
72*525d23c0SHong Zhang     t_setvals += t1 - t0;
73*525d23c0SHong Zhang #endif
74*525d23c0SHong Zhang 
75*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
76*525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
77*525d23c0SHong Zhang #endif
78*525d23c0SHong Zhang   if (!coloring->w3) {
79*525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
80*525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
81*525d23c0SHong Zhang   }
82*525d23c0SHong Zhang   w3 = coloring->w3;
83*525d23c0SHong Zhang 
84*525d23c0SHong Zhang   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
85*525d23c0SHong Zhang   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
86*525d23c0SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
87*525d23c0SHong Zhang   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
88*525d23c0SHong Zhang   for (col=0; col<N; col++) {
89*525d23c0SHong Zhang     if (coloring->htype[0] == 'w') {
90*525d23c0SHong Zhang       dx = 1.0 + unorm;
91*525d23c0SHong Zhang     } else {
92*525d23c0SHong Zhang       dx = xx[col];
93*525d23c0SHong Zhang     }
94*525d23c0SHong Zhang     if (dx == (PetscScalar)0.0) dx = 1.0;
95*525d23c0SHong Zhang     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
96*525d23c0SHong Zhang     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
97*525d23c0SHong Zhang     dx               *= epsilon;
98*525d23c0SHong Zhang     vscale_array[col] = (PetscScalar)dx;
99*525d23c0SHong Zhang   }
100*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
101*525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
102*525d23c0SHong Zhang     t_init += t1 - t0;
103*525d23c0SHong Zhang #endif
104*525d23c0SHong Zhang 
105*525d23c0SHong Zhang   nz  = 0;
106*525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
107*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
108*525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
109*525d23c0SHong Zhang #endif
110*525d23c0SHong Zhang     coloring->currentcolor = k;
111*525d23c0SHong Zhang 
112*525d23c0SHong Zhang     /*
113*525d23c0SHong Zhang       Loop over each column associated with color
114*525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
115*525d23c0SHong Zhang     */
116*525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
117*525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
118*525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
119*525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
120*525d23c0SHong Zhang       col = columns[k][l];
121*525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
122*525d23c0SHong Zhang     }
123*525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
124*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
125*525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
126*525d23c0SHong Zhang     t_dx += t1 - t0;
127*525d23c0SHong Zhang #endif
128*525d23c0SHong Zhang 
129*525d23c0SHong Zhang     /*
130*525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
131*525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
132*525d23c0SHong Zhang     */
133*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
134*525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
135*525d23c0SHong Zhang #endif
136*525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
137*525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
138*525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
139*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
140*525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
141*525d23c0SHong Zhang     t_func += t1 - t0;
142*525d23c0SHong Zhang #endif
143*525d23c0SHong Zhang 
144*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
145*525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
146*525d23c0SHong Zhang #endif
147*525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
148*525d23c0SHong Zhang 
149*525d23c0SHong Zhang     /*
150*525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
151*525d23c0SHong Zhang     */
152*525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
153*525d23c0SHong Zhang     nrows_k = nrows[k];
154*525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
155*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
156*525d23c0SHong Zhang     ierr = PetscTime(&t00);CHKERRQ(ierr);
157*525d23c0SHong Zhang #endif
158*525d23c0SHong Zhang       row   = coloring->rowcolden2sp3[nz++];
159*525d23c0SHong Zhang       col   = coloring->rowcolden2sp3[nz++];
160*525d23c0SHong Zhang       spidx = coloring->rowcolden2sp3[nz++];
161*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
162*525d23c0SHong Zhang       ierr = PetscTime(&t11);CHKERRQ(ierr);
163*525d23c0SHong Zhang       t_kl += t11 - t00;
164*525d23c0SHong Zhang #endif
165*525d23c0SHong Zhang       ca[spidx] = y[row]/vscale_array[col];
166*525d23c0SHong Zhang     }
167*525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
168*525d23c0SHong Zhang 
169*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
170*525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
171*525d23c0SHong Zhang     t_setvals += t1 - t0;
172*525d23c0SHong Zhang #endif
173*525d23c0SHong Zhang   } /* endof for each color */
174*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
175*525d23c0SHong Zhang   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
176*525d23c0SHong Zhang   printf("     FDColorApply time: kl %g\n",t_kl);
177*525d23c0SHong Zhang #endif
178*525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
179*525d23c0SHong Zhang   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
180*525d23c0SHong Zhang 
181*525d23c0SHong Zhang   coloring->currentcolor = -1;
182*525d23c0SHong Zhang   PetscFunctionReturn(0);
183*525d23c0SHong Zhang }
184639f9d9dSBarry Smith 
18579c1e64dSHong Zhang #undef __FUNCT__
18679c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
18779c1e64dSHong Zhang /*
18879c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
18979c1e64dSHong Zhang */
19079c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
19179c1e64dSHong Zhang {
19279c1e64dSHong Zhang   PetscErrorCode ierr;
19379c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
19479c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
19552011a10SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
19679c1e64dSHong Zhang   IS             *isa;
197*525d23c0SHong Zhang   PetscBool      isBAIJ;
19879c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
19979c1e64dSHong Zhang 
20079c1e64dSHong Zhang   PetscFunctionBegin;
20179c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
20252011a10SHong Zhang 
203476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
204476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
20579c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
206*525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
207*525d23c0SHong Zhang   if (!isBAIJ) {
20852011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
20979c1e64dSHong Zhang   }
21079c1e64dSHong Zhang   N         = mat->cmap->N/bs;
21179c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
21279c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
21379c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
21479c1e64dSHong Zhang   c->rstart = 0;
21579c1e64dSHong Zhang 
21679c1e64dSHong Zhang   c->ncolors = nis;
21779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
21879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
21979c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
22085740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
22179c1e64dSHong Zhang 
222*525d23c0SHong Zhang   if (isBAIJ) {
223*525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
224*525d23c0SHong Zhang   } else {
2256378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
226*525d23c0SHong Zhang   }
22779c1e64dSHong Zhang 
228476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
22979c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
23079c1e64dSHong Zhang 
23117a7dee1SHong Zhang   nz = 0;
23279c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
23379c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
23479c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
23579c1e64dSHong Zhang 
23679c1e64dSHong Zhang     c->ncolumns[i] = n;
23779c1e64dSHong Zhang     if (n) {
23879c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
23979c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
24079c1e64dSHong Zhang     } else {
24179c1e64dSHong Zhang       c->columns[i] = 0;
24279c1e64dSHong Zhang     }
24379c1e64dSHong Zhang 
24479c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
245476e0d0aSHong Zhang     nrows = 0;
24679c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
24779c1e64dSHong Zhang       col  = is[j];
24879c1e64dSHong Zhang       rows = cj + ci[col];
24979c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
25079c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
25179c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
252476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
253476e0d0aSHong Zhang         nrows++;
25479c1e64dSHong Zhang       }
25579c1e64dSHong Zhang     }
256476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
25779c1e64dSHong Zhang 
25879c1e64dSHong Zhang     nrows = 0;
25979c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
26079c1e64dSHong Zhang       if (rowhit[j]) {
26117a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
26217a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
26317a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
26479c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
26579c1e64dSHong Zhang       }
26679c1e64dSHong Zhang     }
26779c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
26879c1e64dSHong Zhang   }
26917a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
27085740eacSHong Zhang 
271*525d23c0SHong Zhang   if (isBAIJ) {
272*525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
273*525d23c0SHong Zhang   } else {
2746378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
275*525d23c0SHong Zhang   }
276476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
27779c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
278476e0d0aSHong Zhang 
2794737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
28079c1e64dSHong Zhang   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
28152011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
28279c1e64dSHong Zhang   PetscFunctionReturn(0);
28379c1e64dSHong Zhang }
28479c1e64dSHong Zhang 
2854a2ae208SSatish Balay #undef __FUNCT__
2864a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
2873acb8795SBarry Smith /*
2883acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
2893acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
2903acb8795SBarry Smith */
291dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
292639f9d9dSBarry Smith {
2936849ba73SBarry Smith   PetscErrorCode ierr;
2941a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
2951a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
2963acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
297b9617806SBarry Smith   IS             *isa;
298ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
299476e0d0aSHong Zhang   PetscBool      flg1;
30079c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
301639f9d9dSBarry Smith 
3023a40ed3dSBarry Smith   PetscFunctionBegin;
30379c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
30479c1e64dSHong Zhang   if (new_impl){
30579c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
30679c1e64dSHong Zhang     PetscFunctionReturn(0);
30779c1e64dSHong Zhang   }
308e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
309522c5e43SBarry Smith 
310b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
311476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
312476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
313251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
314476e0d0aSHong Zhang   if (flg1) {
3153acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3163acb8795SBarry Smith   }
3173acb8795SBarry Smith 
3183acb8795SBarry Smith   N         = mat->cmap->N/bs;
3193acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3203acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
3213acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
322005c665bSBarry Smith   c->rstart = 0;
323005c665bSBarry Smith 
324639f9d9dSBarry Smith   c->ncolors = nis;
32538baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
32638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
32738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
32838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
32938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
33043a90d84SBarry Smith 
3313acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
332ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
33370c3da92SBarry Smith 
33470c3da92SBarry Smith   /*
335639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
33670c3da92SBarry Smith   */
3370298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
33870c3da92SBarry Smith 
33938baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
34038baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
34170c3da92SBarry Smith 
342639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
343b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
344639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
3452205254eSKarl Rupp 
346639f9d9dSBarry Smith     c->ncolumns[i] = n;
347639f9d9dSBarry Smith     if (n) {
34838baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
34938baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
35070c3da92SBarry Smith     } else {
351639f9d9dSBarry Smith       c->columns[i] = 0;
35270c3da92SBarry Smith     }
35370c3da92SBarry Smith 
3543a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
3553a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
35638baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
357639f9d9dSBarry Smith       /* loop over columns*/
358639f9d9dSBarry Smith       for (j=0; j<n; j++) {
359639f9d9dSBarry Smith         col  = is[j];
360639f9d9dSBarry Smith         rows = cj + ci[col];
361639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
362639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
363639f9d9dSBarry Smith         for (k=0; k<m; k++) {
364639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
36570c3da92SBarry Smith         }
36670c3da92SBarry Smith       }
367639f9d9dSBarry Smith       /* count the number of hits */
368639f9d9dSBarry Smith       nrows = 0;
36970c3da92SBarry Smith       for (j=0; j<N; j++) {
370639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
371639f9d9dSBarry Smith       }
372639f9d9dSBarry Smith       c->nrows[i] = nrows;
37338baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
37438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
375639f9d9dSBarry Smith       nrows       = 0;
376639f9d9dSBarry Smith       for (j=0; j<N; j++) {
377639f9d9dSBarry Smith         if (rowhit[j]) {
378639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
379639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
380639f9d9dSBarry Smith           nrows++;
38170c3da92SBarry Smith         }
38270c3da92SBarry Smith       }
383639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
3843a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
38538baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
386639f9d9dSBarry Smith       rowhit[N] = N;
387639f9d9dSBarry Smith       nrows     = 0;
388639f9d9dSBarry Smith       /* loop over columns */
389639f9d9dSBarry Smith       for (j=0; j<n; j++) {
390639f9d9dSBarry Smith         col  = is[j];
391639f9d9dSBarry Smith         rows = cj + ci[col];
392639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
393639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
394639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
395639f9d9dSBarry Smith         for (k=0; k<m; k++) {
396639f9d9dSBarry Smith           currentcol = *rows++;
397639f9d9dSBarry Smith           /* is it already in the list? */
398639f9d9dSBarry Smith           do {
399639f9d9dSBarry Smith             mfm = fm;
400639f9d9dSBarry Smith             fm  = rowhit[fm];
401639f9d9dSBarry Smith           } while (fm < currentcol);
402639f9d9dSBarry Smith           /* not in list so add it */
403639f9d9dSBarry Smith           if (fm != currentcol) {
404639f9d9dSBarry Smith             nrows++;
405639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
406639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
407639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
408639f9d9dSBarry Smith             rowhit[currentcol] = fm;
409639f9d9dSBarry Smith             fm                 = currentcol;
410639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
41171cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
412639f9d9dSBarry Smith         }
413639f9d9dSBarry Smith       }
414639f9d9dSBarry Smith       c->nrows[i] = nrows;
41538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
41638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
417639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
418639f9d9dSBarry Smith       nrows = 0;
419639f9d9dSBarry Smith       fm    = rowhit[N];
420639f9d9dSBarry Smith       do {
421639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
422639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
423639f9d9dSBarry Smith         fm                           = rowhit[fm];
424639f9d9dSBarry Smith       } while (fm < N);
425639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
426639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
427639f9d9dSBarry Smith   }
4283acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
429639f9d9dSBarry Smith 
430606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
431606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
432639f9d9dSBarry Smith 
43330b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
43430b34957SBarry Smith   /*
43530b34957SBarry Smith        see the version for MPIAIJ
43630b34957SBarry Smith   */
437ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
43838baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
43930b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
44038baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
44130b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
44230b34957SBarry Smith       col = c->columnsforrow[k][l];
4432205254eSKarl Rupp 
44430b34957SBarry Smith       c->vscaleforrow[k][l] = col;
44530b34957SBarry Smith     }
44630b34957SBarry Smith   }
447b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
4483a40ed3dSBarry Smith   PetscFunctionReturn(0);
44970c3da92SBarry Smith }
45079c1e64dSHong Zhang 
451