xref: /petsc/src/mat/impls/aij/mpi/fdmpiaij.c (revision 9c3909c5e405273cef8e775cc435fd661474488d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3 
4 extern PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat);
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatFDColoringApply_MPIAIJ"
8 PetscErrorCode  MatFDColoringApply_MPIAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
9 {
10   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
11   PetscErrorCode ierr;
12   PetscInt       k,start,end,l,row,col,**vscaleforrow;
13   PetscScalar    dx,*y,*xx,*w3_array;
14   PetscScalar    *vscale_array;
15   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
16   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
17   void           *fctx   = coloring->fctx;
18   PetscBool      flg     = PETSC_FALSE;
19   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
20   Vec            x1_tmp;
21   PetscInt       nz;
22   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)J->data;
23   Mat            A=aij->A,B=aij->B;
24   Mat_SeqAIJ     *cspA=(Mat_SeqAIJ*)A->data, *cspB=(Mat_SeqAIJ*)B->data;
25 
26   PetscFunctionBegin;
27   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
28   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
29   if (flg) {
30     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
31   } else {
32     PetscBool assembled;
33     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
34     if (assembled) {
35       ierr = MatZeroEntries(J);CHKERRQ(ierr);
36     }
37   }
38 
39   x1_tmp = x1;
40   if (!coloring->vscale) {
41     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
42   }
43 
44   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
45     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
46   }
47   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
48 
49   /* Set w1 = F(x1) */
50   if (!coloring->fset) {
51     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
52     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
53     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
54   } else {
55     coloring->fset = PETSC_FALSE;
56   }
57 
58   if (!coloring->w3) {
59     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
60     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
61   }
62   w3 = coloring->w3;
63 
64   /* Compute all the local scale factors, including ghost points */
65   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
66   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
67   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
68   if (ctype == IS_COLORING_GHOSTED) {
69     col_start = 0; col_end = N;
70   } else if (ctype == IS_COLORING_GLOBAL) {
71     xx           = xx - start;
72     vscale_array = vscale_array - start;
73     col_start    = start; col_end = N + start;
74   }
75   for (col=col_start; col<col_end; col++) {
76     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
77     if (coloring->htype[0] == 'w') {
78       dx = 1.0 + unorm;
79     } else {
80       dx = xx[col];
81     }
82     if (dx == (PetscScalar)0.0) dx = 1.0;
83     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
84     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
85     dx               *= epsilon;
86     vscale_array[col] = (PetscScalar)1.0/dx;
87   }
88   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
89   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
90   if (ctype == IS_COLORING_GLOBAL) {
91     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
92     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
93   }
94 
95   if (coloring->vscaleforrow) {
96     vscaleforrow = coloring->vscaleforrow;
97   } else SETERRQ(PetscObjectComm((PetscObject)J),PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
98 
99   /*
100     Loop over each color
101   */
102   nz = 0;
103   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
104   for (k=0; k<coloring->ncolors; k++) {
105     coloring->currentcolor = k;
106 
107     ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
108     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
109     if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
110     /*
111       Loop over each column associated with color
112       adding the perturbation to the vector w3.
113     */
114     for (l=0; l<coloring->ncolumns[k]; l++) {
115       col = coloring->columns[k][l];    /* local column of the matrix we are probing for */
116       if (coloring->htype[0] == 'w') {
117         dx = 1.0 + unorm;
118       } else {
119         dx = xx[col];
120       }
121       if (dx == (PetscScalar)0.0) dx = 1.0;
122       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
123       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
124       dx            *= epsilon;
125       if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
126       w3_array[col] += dx;
127     }
128     if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
129     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
130 
131     /*
132       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
133                            w2 = F(x1 + dx) - F(x1)
134     */
135     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
136     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
137     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
138     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
139 
140     /*
141       Loop over rows of vector, putting results into Jacobian matrix
142     */
143     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
144     for (l=0; l<coloring->nrows[k]; l++) {
145       row     = coloring->rows[k][l];            /* local row index */
146       *(coloring->spaddr[nz++]) = vscale_array[vscaleforrow[k][l]]*y[row];
147     }
148     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
149   } /* endof for each color */
150   if (nz != cspA->nz + cspB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,cspA->nz+cspB->nz);
151   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
153 
154   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
155   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
156   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
157 
158   coloring->currentcolor = -1;
159   PetscFunctionReturn(0);
160 }
161 
162 #undef __FUNCT__
163 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ_new"
164 PetscErrorCode MatFDColoringCreate_MPIAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c)
165 {
166   Mat_MPIAIJ             *aij = (Mat_MPIAIJ*)mat->data;
167   PetscErrorCode         ierr;
168   PetscMPIInt            size,*ncolsonproc,*disp,nn;
169   PetscInt               i,n,nrows,j,k,m,ncols,col;
170   const PetscInt         *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog;
171   PetscInt               nis = iscoloring->n,nctot,*cols;
172   PetscInt               *rowhit,M,cstart,cend,colb;
173   PetscInt               *columnsforrow,l;
174   IS                     *isa;
175   ISLocalToGlobalMapping map   = mat->cmap->mapping;
176   PetscInt               ctype=c->ctype;
177   Mat                    A=aij->A,B=aij->B;
178   Mat_SeqAIJ             *cspA=(Mat_SeqAIJ*)A->data, *cspB=(Mat_SeqAIJ*)B->data;
179   PetscScalar            *A_val=cspA->a,*B_val=cspB->a;
180   PetscInt               spidx;
181 
182   PetscFunctionBegin;
183   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
184 
185   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
186   else     ltog = NULL;
187   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
188 
189   M         = mat->rmap->n;
190   cstart    = mat->cmap->rstart;
191   cend      = mat->cmap->rend;
192   c->M      = mat->rmap->N;         /* set the global rows and columns and local rows */
193   c->N      = mat->cmap->N;
194   c->m      = mat->rmap->n;
195   c->rstart = mat->rmap->rstart;
196 
197   c->ncolors = nis;
198   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
199   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
200   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
201   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
202   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
203   ierr       = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
204 
205   /* Allow access to data structures of local part of matrix */
206   if (!aij->colmap) {
207     ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
208   }
209   PetscInt    *spidxA,*spidxB,nz;
210   PetscScalar **spaddrhit;
211   ierr = MatGetColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
212   ierr = MatGetColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
213 
214   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
215   ierr = PetscMalloc((M+1)*sizeof(PetscScalar*),&spaddrhit);CHKERRQ(ierr);
216   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
217   nz = cspA->nz + cspB->nz; /* total nonzero entries of mat */
218   ierr = PetscMalloc(nz*sizeof(PetscScalar*),&c->spaddr);CHKERRQ(ierr);
219 
220   nz = 0;
221   for (i=0; i<nis; i++) {
222     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
223     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
224 
225     c->ncolumns[i] = n; /* local number of columns of this color on this process */
226     if (n) {
227       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
228       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
230     } else {
231       c->columns[i] = 0;
232     }
233 
234     if (ctype == IS_COLORING_GLOBAL) {
235       /* Determine the total (parallel) number of columns of this color */
236       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
237       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
238 
239       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
240       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
241       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
242       if (!nctot) {
243         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
244       }
245 
246       disp[0] = 0;
247       for (j=1; j<size; j++) {
248         disp[j] = disp[j-1] + ncolsonproc[j-1];
249       }
250 
251       /* Get complete list of columns for color on each processor */
252       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
253       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
254       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
255     } else if (ctype == IS_COLORING_GHOSTED) {
256       /* Determine local number of columns of this color on this process, including ghost points */
257       nctot = n;
258       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
259       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
260     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
261 
262     /*
263        Mark all rows affect by these columns
264     */
265    /*-----------------------------------------------------------------------------*/
266     /* crude, fast version */
267     ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
268     /* loop over columns*/
269     for (j=0; j<nctot; j++) {
270       if (ctype == IS_COLORING_GHOSTED) {
271         col = ltog[cols[j]];
272       } else {
273         col = cols[j];
274       }
275       if (col >= cstart && col < cend) { /* column is in diagonal block of matrix A */
276         rows = A_cj + A_ci[col-cstart];
277         m    = A_ci[col-cstart+1] - A_ci[col-cstart];
278         /* loop over columns of A marking them in rowhit */
279         for (k=0; k<m; k++) {
280           /* set spaddrhit for part A */
281           spidx            = spidxA[A_ci[col-cstart] + k];
282           spaddrhit[*rows] = &A_val[spidx];
283           rowhit[*rows++]  = col + 1;
284         }
285       } else { /* column is in off-diagonal block of matrix B */
286 #if defined(PETSC_USE_CTABLE)
287         ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
288         colb--;
289 #else
290         colb = aij->colmap[col] - 1; /* local column index */
291 #endif
292         if (colb == -1) {
293           m = 0;
294         } else {
295           rows = B_cj + B_ci[colb];
296           m    = B_ci[colb+1] - B_ci[colb];
297         }
298         /* loop over columns of B marking them in rowhit */
299         for (k=0; k<m; k++) {
300           /* set spaddrhit for part B */
301           spidx            = spidxB[B_ci[colb] + k];
302           spaddrhit[*rows] = &B_val[spidx];
303           rowhit[*rows++] = col + 1;
304         }
305       }
306     }
307 
308     /* count the number of hits */
309     nrows = 0;
310     for (j=0; j<M; j++) {
311       if (rowhit[j]) nrows++;
312     }
313     c->nrows[i] = nrows;
314     ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
315     ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
316     ierr        = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
317     nrows       = 0;
318     for (j=0; j<M; j++) {
319       if (rowhit[j]) {
320         c->rows[i][nrows]          = j;              /* local row index */
321         c->columnsforrow[i][nrows] = rowhit[j] - 1;  /* global column index */
322         c->spaddr[nz++]            = spaddrhit[j];   /* address of mat value for this entry */
323         nrows++;
324       }
325     }
326     ierr = PetscFree(cols);CHKERRQ(ierr);
327   }
328   if (nz != cspA->nz + cspB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,cspA->nz+cspB->nz);
329 
330   /* Optimize by adding the vscale, and scaleforrow[][] fields */
331   /*
332        vscale will contain the "diagonal" on processor scalings followed by the off processor
333   */
334   if (ctype == IS_COLORING_GLOBAL) {
335     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr);
336     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
337     for (k=0; k<c->ncolors; k++) {
338       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
339       for (l=0; l<c->nrows[k]; l++) {
340         col = c->columnsforrow[k][l];
341         if (col >= cstart && col < cend) {
342           /* column is in diagonal block of matrix */
343           colb = col - cstart;
344         } else {
345           /* column  is in "off-processor" part */
346 #if defined(PETSC_USE_CTABLE)
347           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
348           colb--;
349 #else
350           colb = aij->colmap[col] - 1;
351 #endif
352           colb += cend - cstart;
353         }
354         c->vscaleforrow[k][l] = colb;
355       }
356     }
357   } else if (ctype == IS_COLORING_GHOSTED) {
358     /* Get gtol mapping */
359     PetscInt N = mat->cmap->N,nlocal,*gtol;
360     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
361     for (i=0; i<N; i++) gtol[i] = -1;
362     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
363     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
364 
365     c->vscale = 0; /* will be created in MatFDColoringApply() */
366     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
367     for (k=0; k<c->ncolors; k++) {
368       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
369       for (l=0; l<c->nrows[k]; l++) {
370         col = c->columnsforrow[k][l];      /* global column index */
371 
372         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
373       }
374     }
375     ierr = PetscFree(gtol);CHKERRQ(ierr);
376   }
377   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
378 
379   ierr = PetscFree(rowhit);CHKERRQ(ierr);
380   ierr = PetscFree(spaddrhit);CHKERRQ(ierr);
381   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
382   ierr = MatRestoreColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
383   ierr = MatRestoreColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
384   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
385 
386   mat->ops->fdcoloringapply = MatFDColoringApply_MPIAIJ;
387   PetscFunctionReturn(0);
388 }
389 
390 /*------------------------------------------------------*/
391 #undef __FUNCT__
392 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ"
393 PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
394 {
395   Mat_MPIAIJ             *aij = (Mat_MPIAIJ*)mat->data;
396   PetscErrorCode         ierr;
397   PetscMPIInt            size,*ncolsonproc,*disp,nn;
398   PetscInt               i,n,nrows,j,k,m,ncols,col;
399   const PetscInt         *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog;
400   PetscInt               nis = iscoloring->n,nctot,*cols;
401   PetscInt               *rowhit,M,cstart,cend,colb;
402   PetscInt               *columnsforrow,l;
403   IS                     *isa;
404   PetscBool              done,flg;
405   ISLocalToGlobalMapping map   = mat->cmap->mapping;
406   PetscInt               ctype=c->ctype;
407   PetscBool              new_impl=PETSC_FALSE;
408 
409   PetscFunctionBegin;
410   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
411   if (new_impl){
412     ierr =  MatFDColoringCreate_MPIAIJ_new(mat,iscoloring,c);CHKERRQ(ierr);
413     PetscFunctionReturn(0);
414   }
415   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
416 
417   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
418   else     ltog = NULL;
419   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
420 
421   M         = mat->rmap->n;
422   cstart    = mat->cmap->rstart;
423   cend      = mat->cmap->rend;
424   c->M      = mat->rmap->N;         /* set the global rows and columns and local rows */
425   c->N      = mat->cmap->N;
426   c->m      = mat->rmap->n;
427   c->rstart = mat->rmap->rstart;
428 
429   c->ncolors = nis;
430   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
431   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
432   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
433   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
434   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
435   ierr       = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
436 
437   /* Allow access to data structures of local part of matrix */
438   if (!aij->colmap) {
439     ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
440   }
441   ierr = MatGetColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
442   ierr = MatGetColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
443 
444   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
445   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
446 
447   for (i=0; i<nis; i++) {
448     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
449     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
450 
451     c->ncolumns[i] = n; /* local number of columns of this color on this process */
452     if (n) {
453       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
454       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
455       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
456     } else {
457       c->columns[i] = 0;
458     }
459 
460     if (ctype == IS_COLORING_GLOBAL) {
461       /* Determine the total (parallel) number of columns of this color */
462       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
463       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
464 
465       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
466       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
467       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
468       if (!nctot) {
469         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
470       }
471 
472       disp[0] = 0;
473       for (j=1; j<size; j++) {
474         disp[j] = disp[j-1] + ncolsonproc[j-1];
475       }
476 
477       /* Get complete list of columns for color on each processor */
478       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
479       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
480       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
481     } else if (ctype == IS_COLORING_GHOSTED) {
482       /* Determine local number of columns of this color on this process, including ghost points */
483       nctot = n;
484       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
485       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
486     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
487 
488     /*
489        Mark all rows affect by these columns
490     */
491     /* Temporary option to allow for debugging/testing */
492     flg  = PETSC_FALSE;
493     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
494     if (!flg) { /*-----------------------------------------------------------------------------*/
495       /* crude, fast version */
496       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
497       /* loop over columns*/
498       for (j=0; j<nctot; j++) {
499         if (ctype == IS_COLORING_GHOSTED) {
500           col = ltog[cols[j]];
501         } else {
502           col = cols[j];
503         }
504         if (col >= cstart && col < cend) {
505           /* column is in diagonal block of matrix */
506           rows = A_cj + A_ci[col-cstart];
507           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
508         } else {
509 #if defined(PETSC_USE_CTABLE)
510           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
511           colb--;
512 #else
513           colb = aij->colmap[col] - 1;
514 #endif
515           if (colb == -1) {
516             m = 0;
517           } else {
518             rows = B_cj + B_ci[colb];
519             m    = B_ci[colb+1] - B_ci[colb];
520           }
521         }
522         /* loop over columns marking them in rowhit */
523         for (k=0; k<m; k++) {
524           rowhit[*rows++] = col + 1;
525         }
526       }
527 
528       /* count the number of hits */
529       nrows = 0;
530       for (j=0; j<M; j++) {
531         if (rowhit[j]) nrows++;
532       }
533       c->nrows[i] = nrows;
534       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
535       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
536       ierr        = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
537       nrows       = 0;
538       for (j=0; j<M; j++) {
539         if (rowhit[j]) {
540           c->rows[i][nrows]          = j;              /* local row index */
541           c->columnsforrow[i][nrows] = rowhit[j] - 1;  /* global column index */
542           nrows++;
543         }
544       }
545     } else { /*-------------------------------------------------------------------------------*/
546       /* slow version, using rowhit as a linked list */
547       PetscInt currentcol,fm,mfm;
548       rowhit[M] = M;
549       nrows     = 0;
550       /* loop over columns*/
551       for (j=0; j<nctot; j++) {
552         if (ctype == IS_COLORING_GHOSTED) {
553           col = ltog[cols[j]];
554         } else {
555           col = cols[j];
556         }
557         if (col >= cstart && col < cend) {
558           /* column is in diagonal block of matrix */
559           rows = A_cj + A_ci[col-cstart];
560           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
561         } else {
562 #if defined(PETSC_USE_CTABLE)
563           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
564           colb--;
565 #else
566           colb = aij->colmap[col] - 1;
567 #endif
568           if (colb == -1) {
569             m = 0;
570           } else {
571             rows = B_cj + B_ci[colb];
572             m    = B_ci[colb+1] - B_ci[colb];
573           }
574         }
575 
576         /* loop over columns marking them in rowhit */
577         fm = M;    /* fm points to first entry in linked list */
578         for (k=0; k<m; k++) {
579           currentcol = *rows++;
580           /* is it already in the list? */
581           do {
582             mfm = fm;
583             fm  = rowhit[fm];
584           } while (fm < currentcol);
585           /* not in list so add it */
586           if (fm != currentcol) {
587             nrows++;
588             columnsforrow[currentcol] = col;
589             /* next three lines insert new entry into linked list */
590             rowhit[mfm]        = currentcol;
591             rowhit[currentcol] = fm;
592             fm                 = currentcol;
593             /* fm points to present position in list since we know the columns are sorted */
594           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
595         }
596       }
597       c->nrows[i] = nrows;
598 
599       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
600       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
601       ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
602       /* now store the linked list of rows into c->rows[i] */
603       nrows = 0;
604       fm    = rowhit[M];
605       do {
606         c->rows[i][nrows]            = fm;
607         c->columnsforrow[i][nrows++] = columnsforrow[fm];
608         fm                           = rowhit[fm];
609       } while (fm < M);
610     } /* ---------------------------------------------------------------------------------------*/
611     ierr = PetscFree(cols);CHKERRQ(ierr);
612   }
613 
614   /* Optimize by adding the vscale, and scaleforrow[][] fields */
615   /*
616        vscale will contain the "diagonal" on processor scalings followed by the off processor
617   */
618   if (ctype == IS_COLORING_GLOBAL) {
619     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr);
620     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
621     for (k=0; k<c->ncolors; k++) {
622       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
623       for (l=0; l<c->nrows[k]; l++) {
624         col = c->columnsforrow[k][l];
625         if (col >= cstart && col < cend) {
626           /* column is in diagonal block of matrix */
627           colb = col - cstart;
628         } else {
629           /* column  is in "off-processor" part */
630 #if defined(PETSC_USE_CTABLE)
631           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
632           colb--;
633 #else
634           colb = aij->colmap[col] - 1;
635 #endif
636           colb += cend - cstart;
637         }
638         c->vscaleforrow[k][l] = colb;
639       }
640     }
641   } else if (ctype == IS_COLORING_GHOSTED) {
642     /* Get gtol mapping */
643     PetscInt N = mat->cmap->N,nlocal,*gtol;
644     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
645     for (i=0; i<N; i++) gtol[i] = -1;
646     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
647     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
648 
649     c->vscale = 0; /* will be created in MatFDColoringApply() */
650     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
651     for (k=0; k<c->ncolors; k++) {
652       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
653       for (l=0; l<c->nrows[k]; l++) {
654         col = c->columnsforrow[k][l];      /* global column index */
655 
656         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
657       }
658     }
659     ierr = PetscFree(gtol);CHKERRQ(ierr);
660   }
661   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
662 
663   ierr = PetscFree(rowhit);CHKERRQ(ierr);
664   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
665   ierr = MatRestoreColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
666   ierr = MatRestoreColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
667   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
668   PetscFunctionReturn(0);
669 }
670 
671 
672 
673 
674 
675 
676