xref: /petsc/src/mat/impls/aij/mpi/fdmpiaij.c (revision f8c2866e654d46b30197bc999c108affa4012b0e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3 #include <../src/mat/impls/baij/mpi/mpibaij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_BAIJ_new"
7 PetscErrorCode  MatFDColoringApply_BAIJ_new(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       k,cstart,cend,l,row,col,nz,spidx,i,j;
12   PetscScalar    dx=0.0,*y,*xx,*w3_array,*dy_i,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon=coloring->error_rel,umin=coloring->umin,unorm;
15   Vec            w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale;
16   void           *fctx=coloring->fctx;
17   PetscBool      flg=PETSC_FALSE;
18   PetscInt       ctype=coloring->ctype,nxloc,nrows_k;
19   Mat_MPIBAIJ    *aij=(Mat_MPIBAIJ*)J->data;
20   PetscScalar    *valaddr;
21   MatEntry       *Jentry=coloring->matentry;
22   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
23   PetscInt       bs=J->rmap->bs;
24 
25   PetscFunctionBegin;
26   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
27   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
28   if (flg) {
29     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
30   } else {
31     PetscBool assembled;
32     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
33     if (assembled) {
34       ierr = MatZeroEntries(J);CHKERRQ(ierr);
35     }
36   }
37 
38   /* create vscale for storing dx */
39   if (!vscale) {
40     if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
41       ierr = VecCreateGhost(PetscObjectComm((PetscObject)J),J->cmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&vscale);CHKERRQ(ierr);
42 
43     } else if (ctype == IS_COLORING_GHOSTED) {
44       ierr = VecDuplicate(x1,&vscale);CHKERRQ(ierr);
45     }
46     coloring->vscale = vscale;
47   }
48 
49   /* (1) Set w1 = F(x1) */
50   if (!coloring->fset) {
51     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
52     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
53     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
54   } else {
55     coloring->fset = PETSC_FALSE;
56   }
57 
58   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
59   ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr);
60   //PetscMPIInt rank;
61   //ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
62   //printf("[%d] nxloc %d\n",rank,nxloc);
63   if (coloring->htype[0] == 'w') {
64     /* vscale = dx is a constant scalar */
65     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
66     dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon);
67   } else {
68     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
69     ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
70     for (col=0; col<nxloc; col++) {
71       dx = xx[col];
72       if (PetscAbsScalar(dx) < umin) {
73         if (PetscRealPart(dx) >= 0.0)      dx = umin;
74         else if (PetscRealPart(dx) < 0.0 ) dx = -umin;
75       }
76       dx               *= epsilon;
77       vscale_array[col] = 1.0/dx;
78     }
79     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
80     ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
81   }
82   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] != 'w') {
83     ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
84     ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
85   }
86 
87   /* (3) Loop over each color */
88   if (!coloring->w3) {
89     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
90     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
91   }
92   w3 = coloring->w3;
93 
94   ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */
95   if (vscale) {
96     ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
97   }
98   nz   = 0;
99   for (k=0; k<ncolors; k++) {
100     coloring->currentcolor = k;
101 
102     /*
103       (3-1) Loop over each column associated with color
104       adding the perturbation to the vector w3 = x1 + dx.
105     */
106     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
107     dy_i = dy;
108     for (i=0; i<bs; i++) {     /* Loop over a block of columns */
109       //------------------------------------------------
110       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
111       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
112       if (coloring->htype[0] == 'w') {
113         for (l=0; l<ncolumns[k]; l++) {
114           col            = i + bs*coloring->columns[k][l];  /* local column (in global index!) of the matrix we are probing for */
115           w3_array[col] += 1.0/dx;
116 
117           if (i) {
118             w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */
119           }
120 
121         }
122       } else { /* htype == 'ds' */
123         vscale_array -= cstart; /* shift pointer so global index can be used */
124         for (l=0; l<ncolumns[k]; l++) {
125           col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
126           w3_array[col] += 1.0/vscale_array[col];
127 
128           if (i) {
129             w3_array[col-1] -=  1.0/vscale_array[col-1]; /* resume original w3[col-1] */
130           }
131 
132         }
133         vscale_array += cstart;
134       }
135       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
136       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
137 
138       /*
139        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
140                            w2 = F(x1 + dx) - F(x1)
141        */
142       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
143       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
144       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
145       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
146       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
147       //---------------------------------------------
148       ierr = VecResetArray(w2);CHKERRQ(ierr);
149       dy_i += nxloc; /* points to dy+i*nxloc */
150     }
151 
152     /*
153      (3-3) Loop over rows of vector, putting results into Jacobian matrix
154     */
155     nrows_k = nrows[k];
156     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
157     if (coloring->htype[0] == 'w') {
158       for (l=0; l<nrows_k; l++) {
159         row     = bs*Jentry[nz].row;   /* local row index */
160         valaddr = Jentry[nz].valaddr;
161         nz++;
162         spidx = 0;
163         dy_i  = dy;
164         for (i=0; i<bs; i++) {   /* column of the block */
165           for (j=0; j<bs; j++) { /* row of the block */
166             valaddr[spidx++] = dy_i[row+j]*dx;
167           }
168           dy_i += nxloc; /* points to dy+i*nxloc */
169         }
170       }
171     } else { /* htype == 'ds' */
172       for (l=0; l<nrows_k; l++) {
173         row     = bs*Jentry[nz].row;   /* local row index */
174         col     = bs*Jentry[nz].col;   /* local column index */
175         valaddr = Jentry[nz].valaddr;
176         nz++;
177         spidx = 0;
178         dy_i  = dy;
179         for (i=0; i<bs; i++) {   /* column of the block */
180           for (j=0; j<bs; j++) { /* row of the block */
181             valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i];
182           }
183           dy_i += nxloc; /* points to dy+i*nxloc */
184         }
185       }
186     }
187     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
188   }
189   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
190   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   if (vscale) {
192     ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
193   }
194 
195   coloring->currentcolor = -1;
196   PetscFunctionReturn(0);
197 }
198 
199 extern PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat);
200 #undef __FUNCT__
201 #define __FUNCT__ "MatFDColoringApply_AIJ_new"
202 PetscErrorCode  MatFDColoringApply_AIJ_new(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
203 {
204   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
205   PetscErrorCode ierr;
206   PetscInt       k,cstart,cend,l,row,col,nz;
207   PetscScalar    dx=0.0,*y,*xx,*w3_array;
208   PetscScalar    *vscale_array;
209   PetscReal      epsilon=coloring->error_rel,umin=coloring->umin,unorm;
210   Vec            w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale;
211   void           *fctx=coloring->fctx;
212   PetscBool      flg=PETSC_FALSE;
213   PetscInt       ctype=coloring->ctype,nxloc,nrows_k;
214   Mat_MPIAIJ     *aij=(Mat_MPIAIJ*)J->data;
215   MatEntry       *Jentry=coloring->matentry;
216   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
217 
218   PetscFunctionBegin;
219   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
220   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
221   if (flg) {
222     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
223   } else {
224     PetscBool assembled;
225     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
226     if (assembled) {
227       ierr = MatZeroEntries(J);CHKERRQ(ierr);
228     }
229   }
230 
231   /* create vscale for storing dx */
232   if (!vscale) {
233     if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
234       ierr = VecCreateGhost(PetscObjectComm((PetscObject)J),J->cmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&vscale);CHKERRQ(ierr);
235     } else if (ctype == IS_COLORING_GHOSTED) {
236       ierr = VecDuplicate(x1,&vscale);CHKERRQ(ierr);
237     }
238     coloring->vscale = vscale;
239   }
240 
241   /* (1) Set w1 = F(x1) */
242   if (!coloring->fset) {
243     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
244     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
245     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
246   } else {
247     coloring->fset = PETSC_FALSE;
248   }
249 
250   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
251   if (coloring->htype[0] == 'w') {
252     /* vscale = dx is a constant scalar */
253     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
254     dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon);
255   } else {
256     ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr);
257     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
258     ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
259     for (col=0; col<nxloc; col++) {
260       dx = xx[col];
261       if (PetscAbsScalar(dx) < umin) {
262         if (PetscRealPart(dx) >= 0.0)      dx = umin;
263         else if (PetscRealPart(dx) < 0.0 ) dx = -umin;
264       }
265       dx               *= epsilon;
266       vscale_array[col] = 1.0/dx;
267     }
268     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
269     ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
270   }
271   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] != 'w') {
272     ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
273     ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
274   }
275 
276   /* (3) Loop over each color */
277   if (!coloring->w3) {
278     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
279     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
280   }
281   w3 = coloring->w3;
282 
283   ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */
284   if (vscale) {
285     ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr);
286   }
287   nz   = 0;
288   for (k=0; k<ncolors; k++) {
289     coloring->currentcolor = k;
290 
291     /*
292       (3-1) Loop over each column associated with color
293       adding the perturbation to the vector w3 = x1 + dx.
294     */
295     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
296     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
297     if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
298     if (coloring->htype[0] == 'w') {
299       for (l=0; l<ncolumns[k]; l++) {
300         col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
301         w3_array[col] += 1.0/dx;
302       }
303     } else { /* htype == 'ds' */
304       vscale_array -= cstart; /* shift pointer so global index can be used */
305       for (l=0; l<ncolumns[k]; l++) {
306         col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
307         w3_array[col] += 1.0/vscale_array[col];
308       }
309       vscale_array += cstart;
310     }
311     if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
312     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
313 
314     /*
315       (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
316                            w2 = F(x1 + dx) - F(x1)
317     */
318     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
319     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
320     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
321     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
322 
323     /*
324      (3-3) Loop over rows of vector, putting results into Jacobian matrix
325     */
326     nrows_k = nrows[k];
327     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
328     if (coloring->htype[0] == 'w') {
329       for (l=0; l<nrows_k; l++) {
330         row                     = Jentry[nz].row;   /* local row index */
331         *(Jentry[nz++].valaddr) = y[row]*dx;
332       }
333     } else { /* htype == 'ds' */
334       for (l=0; l<nrows_k; l++) {
335         row                   = Jentry[nz].row;   /* local row index */
336         *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col];
337         nz++;
338       }
339     }
340     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
341   }
342   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   if (vscale) {
345     ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr);
346   }
347 
348   coloring->currentcolor = -1;
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ_new"
354 PetscErrorCode MatFDColoringCreate_MPIAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c)
355 {
356   //Mat_MPIAIJ             *aij=(Mat_MPIAIJ*)mat->data;
357   PetscErrorCode         ierr;
358   PetscMPIInt            size,*ncolsonproc,*disp,nn;
359   PetscInt               i,n,nrows,nrows_i,j,k,m,ncols,col;
360   const PetscInt         *is,*A_ci,*A_cj,*B_ci,*B_cj,*row = NULL,*ltog=NULL;
361   PetscInt               nis=iscoloring->n,nctot,*cols;
362   PetscInt               *rowhit,cstart,cend,colb;
363   IS                     *isa;
364   ISLocalToGlobalMapping map=mat->cmap->mapping;
365   PetscInt               ctype=c->ctype;
366   Mat                    A,B;
367   //Mat                    A=aij->A,B=aij->B;
368   //Mat_SeqAIJ             *spA=(Mat_SeqAIJ*)A->data,*spB=(Mat_SeqAIJ*)B->data;
369   //PetscScalar            *A_val=spA->a,*B_val=spB->a;
370   PetscScalar            *A_val,*B_val;
371   PetscInt               spidx;
372   PetscInt               *spidxA,*spidxB,nz,bs,bs2;
373   PetscScalar            **valaddrhit;
374   MatEntry               *Jentry;
375   PetscBool              isBAIJ;
376 #if defined(PETSC_USE_CTABLE)
377   PetscTable             colmap=NULL;
378 #else
379   PetscInt               *colmap=NULL;     /* local col number of off-diag col */
380 #endif
381 
382   PetscFunctionBegin;
383   if (ctype == IS_COLORING_GHOSTED) {
384     if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
385     ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);
386   }
387 
388   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
389   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);
390   if (!isBAIJ) {
391     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
392     Mat_MPIAIJ             *aij=(Mat_MPIAIJ*)mat->data;
393     A=aij->A; B=aij->B;
394     Mat_SeqAIJ             *spA=(Mat_SeqAIJ*)A->data,*spB=(Mat_SeqAIJ*)B->data;
395     A_val=spA->a; B_val=spB->a;
396     nz         = spA->nz + spB->nz; /* total nonzero entries of mat */
397     if (!aij->colmap) {
398       ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
399       colmap = aij->colmap;
400     }
401     ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
402     ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
403   } else {
404     Mat_MPIBAIJ             *aij=(Mat_MPIBAIJ*)mat->data;
405     A=aij->A; B=aij->B;
406     Mat_SeqBAIJ             *spA=(Mat_SeqBAIJ*)A->data,*spB=(Mat_SeqBAIJ*)B->data;
407     A_val=spA->a; B_val=spB->a;
408     nz         = spA->nz + spB->nz; /* total nonzero entries of mat */
409     if (!aij->colmap) {
410       ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
411       colmap = aij->colmap;
412     }
413     ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
414     ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
415   }
416 
417   m         = mat->rmap->n/bs;
418   cstart    = mat->cmap->rstart/bs;
419   cend      = mat->cmap->rend/bs;
420   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
421   c->N      = mat->cmap->N/bs;
422   c->m      = m;
423   c->rstart = mat->rmap->rstart/bs;
424 
425   c->ncolors = nis;
426   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
427   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
428   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
429   ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
430 
431   //nz         = spA->nz + spB->nz; /* total nonzero entries of mat */
432   ierr       = PetscMalloc(nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
433   ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr);
434   c->matentry = Jentry;
435 
436   /* Allow access to data structures of local part of matrix
437    - creates aij->colmap which maps global column number to local number in part B */
438   //if (!aij->colmap) {
439   //  ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
440   //}
441   //ierr = MatGetColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
442   //ierr = MatGetColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
443 
444   ierr = PetscMalloc2(m+1,PetscInt,&rowhit,m+1,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
445   nz = 0;
446   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
447   for (i=0; i<nis; i++) { /* for each local color */
448     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
449     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
450 
451     c->ncolumns[i] = n; /* local number of columns of this color on this process */
452     if (n) {
453       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
454       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
455       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
456       /* convert global column indices to local ones! */
457 
458     } else {
459       c->columns[i] = 0;
460     }
461 
462     if (ctype == IS_COLORING_GLOBAL) {
463       /* Determine nctot, the total (parallel) number of columns of this color */
464       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
465       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
466 
467       /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
468       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
469       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
470       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
471       if (!nctot) {
472         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
473       }
474 
475       disp[0] = 0;
476       for (j=1; j<size; j++) {
477         disp[j] = disp[j-1] + ncolsonproc[j-1];
478       }
479 
480       /* Get cols, the complete list of columns for this color on each process */
481       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
482       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
483       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
484     } else if (ctype == IS_COLORING_GHOSTED) {
485       /* Determine local number of columns of this color on this process, including ghost points */
486       nctot = n;
487       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
488       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
489     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
490 
491     /* Mark all rows affect by these columns */
492     ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr);
493     bs2     = bs*bs;
494     nrows_i = 0;
495     for (j=0; j<nctot; j++) { /* loop over columns*/
496       if (ctype == IS_COLORING_GHOSTED) {
497         col = ltog[cols[j]];
498       } else {
499         col = cols[j];
500       }
501       if (col >= cstart && col < cend) { /* column is in diagonal block of matrix A */
502         row      = A_cj + A_ci[col-cstart];
503         nrows    = A_ci[col-cstart+1] - A_ci[col-cstart];
504         nrows_i += nrows;
505         /* loop over columns of A marking them in rowhit */
506         for (k=0; k<nrows; k++) {
507           /* set valaddrhit for part A */
508           spidx            = spidxA[A_ci[col-cstart] + k];
509           valaddrhit[*row] = &A_val[bs2*spidx];
510           rowhit[*row++]   = col - cstart + 1; /* local column index */
511         }
512       } else { /* column is in off-diagonal block of matrix B */
513 #if defined(PETSC_USE_CTABLE)
514         //ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
515         ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr);
516         colb--;
517 #else
518         //colb = aij->colmap[col] - 1; /* local column index */
519         colb = colmap[col] - 1; /* local column index */
520 #endif
521         if (colb == -1) {
522           nrows = 0;
523         } else {
524           colb  = colb/bs;
525           row   = B_cj + B_ci[colb];
526           nrows = B_ci[colb+1] - B_ci[colb];
527         }
528         nrows_i += nrows;
529         /* loop over columns of B marking them in rowhit */
530         for (k=0; k<nrows; k++) {
531           /* set valaddrhit for part B */
532           spidx            = spidxB[B_ci[colb] + k];
533           valaddrhit[*row] = &B_val[bs2*spidx];
534           rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
535         }
536       }
537     }
538     c->nrows[i] = nrows_i;
539 
540     for (j=0; j<m; j++) {
541       if (rowhit[j]) {
542         Jentry[nz].row     = j;              /* local row index */
543         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
544         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
545         nz++;
546       }
547     }
548     ierr = PetscFree(cols);CHKERRQ(ierr);
549   }
550   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
551   //if (nz != spA->nz + spB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz+spB->nz);
552 
553   ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
554   if (isBAIJ) {
555     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
556     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
557     ierr = PetscMalloc(bs*mat->cmap->n*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
558     mat->ops->fdcoloringapply = MatFDColoringApply_BAIJ_new;
559   } else {
560     ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr);
561     ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr);
562     mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new;
563   }
564 
565   if (ctype == IS_COLORING_GHOSTED) {
566     ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 /*------------------------------------------------------*/
572 #undef __FUNCT__
573 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ"
574 PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
575 {
576   Mat_MPIAIJ             *aij = (Mat_MPIAIJ*)mat->data;
577   PetscErrorCode         ierr;
578   PetscMPIInt            size,*ncolsonproc,*disp,nn;
579   PetscInt               i,n,nrows,j,k,m,ncols,col;
580   const PetscInt         *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog;
581   PetscInt               nis = iscoloring->n,nctot,*cols;
582   PetscInt               *rowhit,M,cstart,cend,colb;
583   PetscInt               *columnsforrow,l;
584   IS                     *isa;
585   PetscBool              done,flg;
586   ISLocalToGlobalMapping map   = mat->cmap->mapping;
587   PetscInt               ctype=c->ctype;
588   PetscBool              new_impl=PETSC_FALSE;
589 
590   PetscFunctionBegin;
591   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
592   if (new_impl){
593     ierr =  MatFDColoringCreate_MPIAIJ_new(mat,iscoloring,c);CHKERRQ(ierr);
594     PetscFunctionReturn(0);
595   }
596   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
597 
598   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
599   else     ltog = NULL;
600   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
601 
602   M         = mat->rmap->n;
603   cstart    = mat->cmap->rstart;
604   cend      = mat->cmap->rend;
605   c->M      = mat->rmap->N;         /* set the global rows and columns and local rows */
606   c->N      = mat->cmap->N;
607   c->m      = mat->rmap->n;
608   c->rstart = mat->rmap->rstart;
609 
610   c->ncolors = nis;
611   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
612   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
613   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
614   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
615   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
616   ierr       = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
617 
618   /* Allow access to data structures of local part of matrix */
619   if (!aij->colmap) {
620     ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
621   }
622   ierr = MatGetColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
623   ierr = MatGetColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
624 
625   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
626   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
627 
628   for (i=0; i<nis; i++) {
629     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
630     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
631 
632     c->ncolumns[i] = n; /* local number of columns of this color on this process */
633     if (n) {
634       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
635       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
636       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
637     } else {
638       c->columns[i] = 0;
639     }
640 
641     if (ctype == IS_COLORING_GLOBAL) {
642       /* Determine the total (parallel) number of columns of this color */
643       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
644       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
645 
646       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
647       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
649       if (!nctot) {
650         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
651       }
652 
653       disp[0] = 0;
654       for (j=1; j<size; j++) {
655         disp[j] = disp[j-1] + ncolsonproc[j-1];
656       }
657 
658       /* Get complete list of columns for color on each processor */
659       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
660       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
662     } else if (ctype == IS_COLORING_GHOSTED) {
663       /* Determine local number of columns of this color on this process, including ghost points */
664       nctot = n;
665       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
666       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
667     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
668 
669     /*
670        Mark all rows affect by these columns
671     */
672     /* Temporary option to allow for debugging/testing */
673     flg  = PETSC_FALSE;
674     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
675     if (!flg) { /*-----------------------------------------------------------------------------*/
676       /* crude, fast version */
677       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
678       /* loop over columns*/
679       for (j=0; j<nctot; j++) {
680         if (ctype == IS_COLORING_GHOSTED) {
681           col = ltog[cols[j]];
682         } else {
683           col = cols[j];
684         }
685         if (col >= cstart && col < cend) {
686           /* column is in diagonal block of matrix */
687           rows = A_cj + A_ci[col-cstart];
688           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
689         } else {
690 #if defined(PETSC_USE_CTABLE)
691           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
692           colb--;
693 #else
694           colb = aij->colmap[col] - 1;
695 #endif
696           if (colb == -1) {
697             m = 0;
698           } else {
699             rows = B_cj + B_ci[colb];
700             m    = B_ci[colb+1] - B_ci[colb];
701           }
702         }
703         /* loop over columns marking them in rowhit */
704         for (k=0; k<m; k++) {
705           rowhit[*rows++] = col + 1;
706         }
707       }
708 
709       /* count the number of hits */
710       nrows = 0;
711       for (j=0; j<M; j++) {
712         if (rowhit[j]) nrows++;
713       }
714       c->nrows[i] = nrows;
715       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
716       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
717       ierr        = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
718       nrows       = 0;
719       for (j=0; j<M; j++) {
720         if (rowhit[j]) {
721           c->rows[i][nrows]          = j;              /* local row index */
722           c->columnsforrow[i][nrows] = rowhit[j] - 1;  /* global column index */
723           nrows++;
724         }
725       }
726     } else { /*-------------------------------------------------------------------------------*/
727       /* slow version, using rowhit as a linked list */
728       PetscInt currentcol,fm,mfm;
729       rowhit[M] = M;
730       nrows     = 0;
731       /* loop over columns*/
732       for (j=0; j<nctot; j++) {
733         if (ctype == IS_COLORING_GHOSTED) {
734           col = ltog[cols[j]];
735         } else {
736           col = cols[j];
737         }
738         if (col >= cstart && col < cend) {
739           /* column is in diagonal block of matrix */
740           rows = A_cj + A_ci[col-cstart];
741           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
742         } else {
743 #if defined(PETSC_USE_CTABLE)
744           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
745           colb--;
746 #else
747           colb = aij->colmap[col] - 1;
748 #endif
749           if (colb == -1) {
750             m = 0;
751           } else {
752             rows = B_cj + B_ci[colb];
753             m    = B_ci[colb+1] - B_ci[colb];
754           }
755         }
756 
757         /* loop over columns marking them in rowhit */
758         fm = M;    /* fm points to first entry in linked list */
759         for (k=0; k<m; k++) {
760           currentcol = *rows++;
761           /* is it already in the list? */
762           do {
763             mfm = fm;
764             fm  = rowhit[fm];
765           } while (fm < currentcol);
766           /* not in list so add it */
767           if (fm != currentcol) {
768             nrows++;
769             columnsforrow[currentcol] = col;
770             /* next three lines insert new entry into linked list */
771             rowhit[mfm]        = currentcol;
772             rowhit[currentcol] = fm;
773             fm                 = currentcol;
774             /* fm points to present position in list since we know the columns are sorted */
775           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
776         }
777       }
778       c->nrows[i] = nrows;
779 
780       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
781       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
782       ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
783       /* now store the linked list of rows into c->rows[i] */
784       nrows = 0;
785       fm    = rowhit[M];
786       do {
787         c->rows[i][nrows]            = fm;
788         c->columnsforrow[i][nrows++] = columnsforrow[fm];
789         fm                           = rowhit[fm];
790       } while (fm < M);
791     } /* ---------------------------------------------------------------------------------------*/
792     ierr = PetscFree(cols);CHKERRQ(ierr);
793   }
794 
795   /* Optimize by adding the vscale, and scaleforrow[][] fields */
796   /*
797        vscale will contain the "diagonal" on processor scalings followed by the off processor
798   */
799   if (ctype == IS_COLORING_GLOBAL) {
800     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr);
801     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
802     for (k=0; k<c->ncolors; k++) {
803       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
804       for (l=0; l<c->nrows[k]; l++) {
805         col = c->columnsforrow[k][l];
806         if (col >= cstart && col < cend) {
807           /* column is in diagonal block of matrix */
808           colb = col - cstart;
809         } else {
810           /* column  is in "off-processor" part */
811 #if defined(PETSC_USE_CTABLE)
812           ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr);
813           colb--;
814 #else
815           colb = aij->colmap[col] - 1;
816 #endif
817           colb += cend - cstart;
818         }
819         c->vscaleforrow[k][l] = colb;
820       }
821     }
822   } else if (ctype == IS_COLORING_GHOSTED) {
823     /* Get gtol mapping */
824     PetscInt N = mat->cmap->N,nlocal,*gtol;
825     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
826     for (i=0; i<N; i++) gtol[i] = -1;
827     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
828     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
829 
830     c->vscale = 0; /* will be created in MatFDColoringApply() */
831     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
832     for (k=0; k<c->ncolors; k++) {
833       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
834       for (l=0; l<c->nrows[k]; l++) {
835         col = c->columnsforrow[k][l];      /* global column index */
836         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
837       }
838     }
839     ierr = PetscFree(gtol);CHKERRQ(ierr);
840   }
841   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
842 
843   ierr = PetscFree(rowhit);CHKERRQ(ierr);
844   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
845   ierr = MatRestoreColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
846   ierr = MatRestoreColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
847   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
848   PetscFunctionReturn(0);
849 }
850 
851 
852 
853 
854 
855 
856