xref: /petsc/src/ksp/pc/impls/gamg/util.c (revision 6618991c9398194553537e62f98096f62e64fc5a)
1*6618991cSMark Adams /*
2*6618991cSMark Adams  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3*6618991cSMark Adams  */
4*6618991cSMark Adams #include <petsc/private/matimpl.h>
5*6618991cSMark Adams #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6*6618991cSMark Adams #include <petsc/private/kspimpl.h>
7*6618991cSMark Adams 
8*6618991cSMark Adams #undef __FUNCT__
9*6618991cSMark Adams #define __FUNCT__ "MatCollapseRow"
10*6618991cSMark Adams /*
11*6618991cSMark Adams    Produces a set of block column indices of the matrix row, one for each block represented in the original row
12*6618991cSMark Adams 
13*6618991cSMark Adams    n - the number of block indices in cc[]
14*6618991cSMark Adams    cc - the block indices (must be large enough to contain the indices)
15*6618991cSMark Adams */
16*6618991cSMark Adams PETSC_STATIC_INLINE PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
17*6618991cSMark Adams {
18*6618991cSMark Adams   PetscInt       cnt = -1,nidx,j;
19*6618991cSMark Adams   const PetscInt *idx;
20*6618991cSMark Adams   PetscErrorCode ierr;
21*6618991cSMark Adams 
22*6618991cSMark Adams   PetscFunctionBegin;
23*6618991cSMark Adams   ierr = MatGetRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr);
24*6618991cSMark Adams   if (nidx) {
25*6618991cSMark Adams     cnt = 0;
26*6618991cSMark Adams     cc[cnt] = idx[0]/bs;
27*6618991cSMark Adams     for (j=1; j<nidx; j++) {
28*6618991cSMark Adams       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
29*6618991cSMark Adams     }
30*6618991cSMark Adams   }
31*6618991cSMark Adams   ierr = MatRestoreRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr);
32*6618991cSMark Adams   *n = cnt+1;
33*6618991cSMark Adams   PetscFunctionReturn(0);
34*6618991cSMark Adams }
35*6618991cSMark Adams 
36*6618991cSMark Adams #undef __FUNCT__
37*6618991cSMark Adams #define __FUNCT__ "MatCollapseRows"
38*6618991cSMark Adams /*
39*6618991cSMark Adams     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
40*6618991cSMark Adams 
41*6618991cSMark Adams     ncollapsed - the number of block indices
42*6618991cSMark Adams     collapsed - the block indices (must be large enough to contain the indices)
43*6618991cSMark Adams */
44*6618991cSMark Adams PETSC_STATIC_INLINE PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
45*6618991cSMark Adams {
46*6618991cSMark Adams   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
47*6618991cSMark Adams   PetscErrorCode ierr;
48*6618991cSMark Adams 
49*6618991cSMark Adams   PetscFunctionBegin;
50*6618991cSMark Adams   ierr = MatCollapseRow(Amat,start,bs,&nprev,cprev);CHKERRQ(ierr);
51*6618991cSMark Adams   for (i=start+1; i<start+bs; i++) {
52*6618991cSMark Adams     ierr  = MatCollapseRow(Amat,i,bs,&ncur,ccur);CHKERRQ(ierr);
53*6618991cSMark Adams     ierr  = PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged);CHKERRQ(ierr);
54*6618991cSMark Adams     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
55*6618991cSMark Adams   }
56*6618991cSMark Adams   *ncollapsed = nprev;
57*6618991cSMark Adams   if (collapsed) *collapsed  = cprev;
58*6618991cSMark Adams   PetscFunctionReturn(0);
59*6618991cSMark Adams }
60*6618991cSMark Adams 
61*6618991cSMark Adams 
62*6618991cSMark Adams /* -------------------------------------------------------------------------- */
63*6618991cSMark Adams /*
64*6618991cSMark Adams    PCGAMGCreateGraph - create simple scaled scalar graph from matrix
65*6618991cSMark Adams 
66*6618991cSMark Adams  Input Parameter:
67*6618991cSMark Adams  . Amat - matrix
68*6618991cSMark Adams  Output Parameter:
69*6618991cSMark Adams  . a_Gmaat - eoutput scalar graph (symmetric?)
70*6618991cSMark Adams  */
71*6618991cSMark Adams #undef __FUNCT__
72*6618991cSMark Adams #define __FUNCT__ "PCGAMGCreateGraph"
73*6618991cSMark Adams PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat)
74*6618991cSMark Adams {
75*6618991cSMark Adams   PetscErrorCode ierr;
76*6618991cSMark Adams   PetscInt       Istart,Iend,Ii,i,jj,kk,ncols,nloc,NN,MM,bs;
77*6618991cSMark Adams   MPI_Comm       comm;
78*6618991cSMark Adams   Mat            Gmat;
79*6618991cSMark Adams   MatType        mtype;
80*6618991cSMark Adams 
81*6618991cSMark Adams   PetscFunctionBegin;
82*6618991cSMark Adams   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
83*6618991cSMark Adams   ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr);
84*6618991cSMark Adams   ierr = MatGetSize(Amat, &MM, &NN);CHKERRQ(ierr);
85*6618991cSMark Adams   ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr);
86*6618991cSMark Adams   nloc = (Iend-Istart)/bs;
87*6618991cSMark Adams 
88*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG
89*6618991cSMark Adams   ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
90*6618991cSMark Adams #endif
91*6618991cSMark Adams 
92*6618991cSMark Adams   if (bs > 1) {
93*6618991cSMark Adams     const PetscScalar *vals;
94*6618991cSMark Adams     const PetscInt    *idx;
95*6618991cSMark Adams     PetscInt          *d_nnz, *o_nnz,*blockmask = NULL,maskcnt,*w0,*w1,*w2;
96*6618991cSMark Adams     PetscBool         ismpiaij,isseqaij;
97*6618991cSMark Adams 
98*6618991cSMark Adams     /*
99*6618991cSMark Adams        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
100*6618991cSMark Adams     */
101*6618991cSMark Adams 
102*6618991cSMark Adams     ierr = PetscObjectTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
103*6618991cSMark Adams     ierr = PetscObjectTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
104*6618991cSMark Adams 
105*6618991cSMark Adams     ierr = PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);CHKERRQ(ierr);
106*6618991cSMark Adams 
107*6618991cSMark Adams     if (isseqaij) {
108*6618991cSMark Adams       PetscInt       max_d_nnz;
109*6618991cSMark Adams 
110*6618991cSMark Adams       /*
111*6618991cSMark Adams           Determine exact preallocation count for (sequential) scalar matrix
112*6618991cSMark Adams       */
113*6618991cSMark Adams       ierr = MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz);CHKERRQ(ierr);
114*6618991cSMark Adams       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);CHKERRQ(ierr);
115*6618991cSMark Adams       ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr);
116*6618991cSMark Adams       for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
117*6618991cSMark Adams         ierr = MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr);
118*6618991cSMark Adams       }
119*6618991cSMark Adams       ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr);
120*6618991cSMark Adams 
121*6618991cSMark Adams     } else if (ismpiaij) {
122*6618991cSMark Adams       Mat            Daij,Oaij;
123*6618991cSMark Adams       const PetscInt *garray;
124*6618991cSMark Adams       PetscInt       max_d_nnz;
125*6618991cSMark Adams 
126*6618991cSMark Adams       ierr = MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray);CHKERRQ(ierr);
127*6618991cSMark Adams 
128*6618991cSMark Adams       /*
129*6618991cSMark Adams           Determine exact preallocation count for diagonal block portion of scalar matrix
130*6618991cSMark Adams       */
131*6618991cSMark Adams       ierr = MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz);CHKERRQ(ierr);
132*6618991cSMark Adams       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);CHKERRQ(ierr);
133*6618991cSMark Adams       ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr);
134*6618991cSMark Adams       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
135*6618991cSMark Adams         ierr = MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr);
136*6618991cSMark Adams       }
137*6618991cSMark Adams       ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr);
138*6618991cSMark Adams 
139*6618991cSMark Adams       /*
140*6618991cSMark Adams          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
141*6618991cSMark Adams       */
142*6618991cSMark Adams       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
143*6618991cSMark Adams         o_nnz[jj] = 0;
144*6618991cSMark Adams         for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
145*6618991cSMark Adams           ierr = MatGetRow(Oaij,Ii+kk,&ncols,0,0);CHKERRQ(ierr);
146*6618991cSMark Adams           o_nnz[jj] += ncols;
147*6618991cSMark Adams           ierr = MatRestoreRow(Oaij,Ii+kk,&ncols,0,0);CHKERRQ(ierr);
148*6618991cSMark Adams         }
149*6618991cSMark Adams         if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
150*6618991cSMark Adams       }
151*6618991cSMark Adams 
152*6618991cSMark Adams     } else {
153*6618991cSMark Adams       /*
154*6618991cSMark Adams 
155*6618991cSMark Adams        This is O(nloc*nloc/bs) work!
156*6618991cSMark Adams 
157*6618991cSMark Adams        This is accurate for the "diagonal" block of the matrix but will be grossly high for the
158*6618991cSMark Adams        off diagonal block most of the time but could be too low for the off-diagonal.
159*6618991cSMark Adams 
160*6618991cSMark Adams        This should be fixed to be accurate for the off-diagonal portion. Cannot just use a mask
161*6618991cSMark Adams        for the off-diagonal portion since for huge matrices that would require too much memory per
162*6618991cSMark Adams        MPI process.
163*6618991cSMark Adams       */
164*6618991cSMark Adams       ierr = PetscMalloc1(nloc, &blockmask);CHKERRQ(ierr);
165*6618991cSMark Adams       for (Ii = Istart, jj = 0; Ii < Iend; Ii += bs, jj++) {
166*6618991cSMark Adams         o_nnz[jj] = 0;
167*6618991cSMark Adams         ierr = PetscMemzero(blockmask,nloc*sizeof(PetscInt));CHKERRQ(ierr);
168*6618991cSMark Adams         for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
169*6618991cSMark Adams           ierr = MatGetRow(Amat,Ii+kk,&ncols,&idx,0);CHKERRQ(ierr);
170*6618991cSMark Adams           for (i=0; i<ncols; i++) {
171*6618991cSMark Adams             if (idx[i] >= Istart && idx[i] < Iend) {
172*6618991cSMark Adams               blockmask[(idx[i] - Istart)/bs] = 1;
173*6618991cSMark Adams             }
174*6618991cSMark Adams           }
175*6618991cSMark Adams           if (ncols > o_nnz[jj]) {
176*6618991cSMark Adams             o_nnz[jj] = ncols;
177*6618991cSMark Adams             if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
178*6618991cSMark Adams           }
179*6618991cSMark Adams           ierr = MatRestoreRow(Amat,Ii+kk,&ncols,&idx,0);CHKERRQ(ierr);
180*6618991cSMark Adams         }
181*6618991cSMark Adams         maskcnt = 0;
182*6618991cSMark Adams         for (i=0; i<nloc; i++) {
183*6618991cSMark Adams           if (blockmask[i]) maskcnt++;
184*6618991cSMark Adams         }
185*6618991cSMark Adams         d_nnz[jj] = maskcnt;
186*6618991cSMark Adams       }
187*6618991cSMark Adams       ierr = PetscFree(blockmask);CHKERRQ(ierr);
188*6618991cSMark Adams     }
189*6618991cSMark Adams 
190*6618991cSMark Adams     /* get scalar copy (norms) of matrix -- AIJ specific!!! */
191*6618991cSMark Adams     ierr = MatGetType(Amat,&mtype);CHKERRQ(ierr);
192*6618991cSMark Adams     ierr = MatCreate(comm, &Gmat);CHKERRQ(ierr);
193*6618991cSMark Adams     ierr = MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
194*6618991cSMark Adams     ierr = MatSetBlockSizes(Gmat, 1, 1);CHKERRQ(ierr);
195*6618991cSMark Adams     ierr = MatSetType(Gmat, mtype);CHKERRQ(ierr);
196*6618991cSMark Adams     ierr = MatSeqAIJSetPreallocation(Gmat,0,d_nnz);CHKERRQ(ierr);
197*6618991cSMark Adams     ierr = MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
198*6618991cSMark Adams     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
199*6618991cSMark Adams 
200*6618991cSMark Adams     for (Ii = Istart; Ii < Iend; Ii++) {
201*6618991cSMark Adams       PetscInt dest_row = Ii/bs;
202*6618991cSMark Adams       ierr = MatGetRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
203*6618991cSMark Adams       for (jj=0; jj<ncols; jj++) {
204*6618991cSMark Adams         PetscInt    dest_col = idx[jj]/bs;
205*6618991cSMark Adams         PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
206*6618991cSMark Adams         ierr = MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);CHKERRQ(ierr);
207*6618991cSMark Adams       }
208*6618991cSMark Adams       ierr = MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
209*6618991cSMark Adams     }
210*6618991cSMark Adams     ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
211*6618991cSMark Adams     ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
212*6618991cSMark Adams   } else {
213*6618991cSMark Adams     /* just copy scalar matrix - abs() not taken here but scaled later */
214*6618991cSMark Adams     ierr = MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);CHKERRQ(ierr);
215*6618991cSMark Adams   }
216*6618991cSMark Adams 
217*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG
218*6618991cSMark Adams   ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
219*6618991cSMark Adams #endif
220*6618991cSMark Adams 
221*6618991cSMark Adams   *a_Gmat = Gmat;
222*6618991cSMark Adams   PetscFunctionReturn(0);
223*6618991cSMark Adams }
224*6618991cSMark Adams 
225*6618991cSMark Adams /* -------------------------------------------------------------------------- */
226*6618991cSMark Adams /*
227*6618991cSMark Adams    PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and symetrize if needed
228*6618991cSMark Adams 
229*6618991cSMark Adams  Input Parameter:
230*6618991cSMark Adams  . vfilter - threshold paramter [0,1)
231*6618991cSMark Adams  . symm - symetrize?
232*6618991cSMark Adams  In/Output Parameter:
233*6618991cSMark Adams  . a_Gmat - original graph
234*6618991cSMark Adams  */
235*6618991cSMark Adams #undef __FUNCT__
236*6618991cSMark Adams #define __FUNCT__ "PCGAMGFilterGraph"
237*6618991cSMark Adams PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm)
238*6618991cSMark Adams {
239*6618991cSMark Adams   PetscErrorCode    ierr;
240*6618991cSMark Adams   PetscInt          Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc;
241*6618991cSMark Adams   PetscMPIInt       rank;
242*6618991cSMark Adams   Mat               Gmat  = *a_Gmat, tGmat, matTrans;
243*6618991cSMark Adams   MPI_Comm          comm;
244*6618991cSMark Adams   const PetscScalar *vals;
245*6618991cSMark Adams   const PetscInt    *idx;
246*6618991cSMark Adams   PetscInt          *d_nnz, *o_nnz;
247*6618991cSMark Adams   Vec               diag;
248*6618991cSMark Adams   MatType           mtype;
249*6618991cSMark Adams 
250*6618991cSMark Adams   PetscFunctionBegin;
251*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG
252*6618991cSMark Adams   ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
253*6618991cSMark Adams #endif
254*6618991cSMark Adams   /* scale Gmat for all values between -1 and 1 */
255*6618991cSMark Adams   ierr = MatCreateVecs(Gmat, &diag, 0);CHKERRQ(ierr);
256*6618991cSMark Adams   ierr = MatGetDiagonal(Gmat, diag);CHKERRQ(ierr);
257*6618991cSMark Adams   ierr = VecReciprocal(diag);CHKERRQ(ierr);
258*6618991cSMark Adams   ierr = VecSqrtAbs(diag);CHKERRQ(ierr);
259*6618991cSMark Adams   ierr = MatDiagonalScale(Gmat, diag, diag);CHKERRQ(ierr);
260*6618991cSMark Adams   ierr = VecDestroy(&diag);CHKERRQ(ierr);
261*6618991cSMark Adams 
262*6618991cSMark Adams   if (vfilter < 0.0 && !symm) {
263*6618991cSMark Adams     /* Just use the provided matrix as the graph but make all values positive */
264*6618991cSMark Adams     Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)Gmat->data;
265*6618991cSMark Adams     MatInfo     info;
266*6618991cSMark Adams     PetscScalar *avals;
267*6618991cSMark Adams     PetscMPIInt size;
268*6618991cSMark Adams 
269*6618991cSMark Adams     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)Gmat),&size);CHKERRQ(ierr);
270*6618991cSMark Adams     if (size == 1) {
271*6618991cSMark Adams       ierr = MatGetInfo(Gmat,MAT_LOCAL,&info);CHKERRQ(ierr);
272*6618991cSMark Adams       ierr = MatSeqAIJGetArray(Gmat,&avals);CHKERRQ(ierr);
273*6618991cSMark Adams       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
274*6618991cSMark Adams       ierr = MatSeqAIJRestoreArray(Gmat,&avals);CHKERRQ(ierr);
275*6618991cSMark Adams     } else {
276*6618991cSMark Adams       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
277*6618991cSMark Adams       ierr = MatSeqAIJGetArray(aij->A,&avals);CHKERRQ(ierr);
278*6618991cSMark Adams       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
279*6618991cSMark Adams       ierr = MatSeqAIJRestoreArray(aij->A,&avals);CHKERRQ(ierr);
280*6618991cSMark Adams       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
281*6618991cSMark Adams       ierr = MatSeqAIJGetArray(aij->B,&avals);CHKERRQ(ierr);
282*6618991cSMark Adams       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
283*6618991cSMark Adams       ierr = MatSeqAIJRestoreArray(aij->B,&avals);CHKERRQ(ierr);
284*6618991cSMark Adams     }
285*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG
286*6618991cSMark Adams     ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
287*6618991cSMark Adams #endif
288*6618991cSMark Adams     PetscFunctionReturn(0);
289*6618991cSMark Adams   }
290*6618991cSMark Adams 
291*6618991cSMark Adams   ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr);
292*6618991cSMark Adams   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
293*6618991cSMark Adams   ierr = MatGetOwnershipRange(Gmat, &Istart, &Iend);CHKERRQ(ierr);
294*6618991cSMark Adams   nloc = Iend - Istart;
295*6618991cSMark Adams   ierr = MatGetSize(Gmat, &MM, &NN);CHKERRQ(ierr);
296*6618991cSMark Adams 
297*6618991cSMark Adams   if (symm) {
298*6618991cSMark Adams     ierr = MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);CHKERRQ(ierr);
299*6618991cSMark Adams   }
300*6618991cSMark Adams 
301*6618991cSMark Adams   /* Determine upper bound on nonzeros needed in new filtered matrix */
302*6618991cSMark Adams   ierr = PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz);CHKERRQ(ierr);
303*6618991cSMark Adams   for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) {
304*6618991cSMark Adams     ierr      = MatGetRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
305*6618991cSMark Adams     d_nnz[jj] = ncols;
306*6618991cSMark Adams     o_nnz[jj] = ncols;
307*6618991cSMark Adams     ierr      = MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
308*6618991cSMark Adams     if (symm) {
309*6618991cSMark Adams       ierr       = MatGetRow(matTrans,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
310*6618991cSMark Adams       d_nnz[jj] += ncols;
311*6618991cSMark Adams       o_nnz[jj] += ncols;
312*6618991cSMark Adams       ierr       = MatRestoreRow(matTrans,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
313*6618991cSMark Adams     }
314*6618991cSMark Adams     if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
315*6618991cSMark Adams     if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc;
316*6618991cSMark Adams   }
317*6618991cSMark Adams   ierr = MatGetType(Gmat,&mtype);CHKERRQ(ierr);
318*6618991cSMark Adams   ierr = MatCreate(comm, &tGmat);CHKERRQ(ierr);
319*6618991cSMark Adams   ierr = MatSetSizes(tGmat,nloc,nloc,MM,MM);CHKERRQ(ierr);
320*6618991cSMark Adams   ierr = MatSetBlockSizes(tGmat, 1, 1);CHKERRQ(ierr);
321*6618991cSMark Adams   ierr = MatSetType(tGmat, mtype);CHKERRQ(ierr);
322*6618991cSMark Adams   ierr = MatSeqAIJSetPreallocation(tGmat,0,d_nnz);CHKERRQ(ierr);
323*6618991cSMark Adams   ierr = MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
324*6618991cSMark Adams   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
325*6618991cSMark Adams   if (symm) {
326*6618991cSMark Adams     ierr = MatDestroy(&matTrans);CHKERRQ(ierr);
327*6618991cSMark Adams   } else {
328*6618991cSMark Adams     /* all entries are generated locally so MatAssembly will be slightly faster for large process counts */
329*6618991cSMark Adams     ierr = MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
330*6618991cSMark Adams   }
331*6618991cSMark Adams 
332*6618991cSMark Adams   for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) {
333*6618991cSMark Adams     ierr = MatGetRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
334*6618991cSMark Adams     for (jj=0; jj<ncols; jj++,nnz0++) {
335*6618991cSMark Adams       PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
336*6618991cSMark Adams       if (PetscRealPart(sv) > vfilter) {
337*6618991cSMark Adams         nnz1++;
338*6618991cSMark Adams         if (symm) {
339*6618991cSMark Adams           sv  *= 0.5;
340*6618991cSMark Adams           ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);CHKERRQ(ierr);
341*6618991cSMark Adams           ierr = MatSetValues(tGmat,1,&idx[jj],1,&Ii,&sv,ADD_VALUES);CHKERRQ(ierr);
342*6618991cSMark Adams         } else {
343*6618991cSMark Adams           ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);CHKERRQ(ierr);
344*6618991cSMark Adams         }
345*6618991cSMark Adams       }
346*6618991cSMark Adams     }
347*6618991cSMark Adams     ierr = MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
348*6618991cSMark Adams   }
349*6618991cSMark Adams   ierr = MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
350*6618991cSMark Adams   ierr = MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351*6618991cSMark Adams 
352*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG
353*6618991cSMark Adams   ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
354*6618991cSMark Adams #endif
355*6618991cSMark Adams 
356*6618991cSMark Adams #if defined(PETSC_USE_INFO)
357*6618991cSMark Adams   {
358*6618991cSMark Adams     double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc;
359*6618991cSMark Adams     ierr = PetscInfo4(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM);CHKERRQ(ierr);
360*6618991cSMark Adams   }
361*6618991cSMark Adams #endif
362*6618991cSMark Adams   ierr    = MatDestroy(&Gmat);CHKERRQ(ierr);
363*6618991cSMark Adams   *a_Gmat = tGmat;
364*6618991cSMark Adams   PetscFunctionReturn(0);
365*6618991cSMark Adams }
366*6618991cSMark Adams 
367*6618991cSMark Adams /* -------------------------------------------------------------------------- */
368*6618991cSMark Adams /*
369*6618991cSMark Adams    PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have > 1 pe
370*6618991cSMark Adams 
371*6618991cSMark Adams    Input Parameter:
372*6618991cSMark Adams    . Gmat - MPIAIJ matrix for scattters
373*6618991cSMark Adams    . data_sz - number of data terms per node (# cols in output)
374*6618991cSMark Adams    . data_in[nloc*data_sz] - column oriented data
375*6618991cSMark Adams    Output Parameter:
376*6618991cSMark Adams    . a_stride - numbrt of rows of output
377*6618991cSMark Adams    . a_data_out[stride*data_sz] - output data with ghosts
378*6618991cSMark Adams */
379*6618991cSMark Adams #undef __FUNCT__
380*6618991cSMark Adams #define __FUNCT__ "PCGAMGGetDataWithGhosts"
381*6618991cSMark Adams PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
382*6618991cSMark Adams {
383*6618991cSMark Adams   PetscErrorCode ierr;
384*6618991cSMark Adams   MPI_Comm       comm;
385*6618991cSMark Adams   Vec            tmp_crds;
386*6618991cSMark Adams   Mat_MPIAIJ     *mpimat = (Mat_MPIAIJ*)Gmat->data;
387*6618991cSMark Adams   PetscInt       nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
388*6618991cSMark Adams   PetscScalar    *data_arr;
389*6618991cSMark Adams   PetscReal      *datas;
390*6618991cSMark Adams   PetscBool      isMPIAIJ;
391*6618991cSMark Adams 
392*6618991cSMark Adams   PetscFunctionBegin;
393*6618991cSMark Adams   ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr);
394*6618991cSMark Adams   ierr      = PetscObjectTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ);CHKERRQ(ierr);
395*6618991cSMark Adams   ierr      = MatGetOwnershipRange(Gmat, &my0, &Iend);CHKERRQ(ierr);
396*6618991cSMark Adams   nloc      = Iend - my0;
397*6618991cSMark Adams   ierr      = VecGetLocalSize(mpimat->lvec, &num_ghosts);CHKERRQ(ierr);
398*6618991cSMark Adams   nnodes    = num_ghosts + nloc;
399*6618991cSMark Adams   *a_stride = nnodes;
400*6618991cSMark Adams   ierr      = MatCreateVecs(Gmat, &tmp_crds, 0);CHKERRQ(ierr);
401*6618991cSMark Adams 
402*6618991cSMark Adams   ierr = PetscMalloc1(data_sz*nnodes, &datas);CHKERRQ(ierr);
403*6618991cSMark Adams   for (dir=0; dir<data_sz; dir++) {
404*6618991cSMark Adams     /* set local, and global */
405*6618991cSMark Adams     for (kk=0; kk<nloc; kk++) {
406*6618991cSMark Adams       PetscInt    gid = my0 + kk;
407*6618991cSMark Adams       PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
408*6618991cSMark Adams       datas[dir*nnodes + kk] = PetscRealPart(crd);
409*6618991cSMark Adams 
410*6618991cSMark Adams       ierr = VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES);CHKERRQ(ierr);
411*6618991cSMark Adams     }
412*6618991cSMark Adams     ierr = VecAssemblyBegin(tmp_crds);CHKERRQ(ierr);
413*6618991cSMark Adams     ierr = VecAssemblyEnd(tmp_crds);CHKERRQ(ierr);
414*6618991cSMark Adams     /* get ghost datas */
415*6618991cSMark Adams     ierr = VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416*6618991cSMark Adams     ierr = VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417*6618991cSMark Adams     ierr = VecGetArray(mpimat->lvec, &data_arr);CHKERRQ(ierr);
418*6618991cSMark Adams     for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
419*6618991cSMark Adams     ierr = VecRestoreArray(mpimat->lvec, &data_arr);CHKERRQ(ierr);
420*6618991cSMark Adams   }
421*6618991cSMark Adams   ierr        = VecDestroy(&tmp_crds);CHKERRQ(ierr);
422*6618991cSMark Adams   *a_data_out = datas;
423*6618991cSMark Adams   PetscFunctionReturn(0);
424*6618991cSMark Adams }
425*6618991cSMark Adams 
426*6618991cSMark Adams 
427*6618991cSMark Adams /*
428*6618991cSMark Adams  *
429*6618991cSMark Adams  *  GAMGTableCreate
430*6618991cSMark Adams  */
431*6618991cSMark Adams 
432*6618991cSMark Adams #undef __FUNCT__
433*6618991cSMark Adams #define __FUNCT__ "GAMGTableCreate"
434*6618991cSMark Adams PetscErrorCode GAMGTableCreate(PetscInt a_size, GAMGHashTable *a_tab)
435*6618991cSMark Adams {
436*6618991cSMark Adams   PetscErrorCode ierr;
437*6618991cSMark Adams   PetscInt       kk;
438*6618991cSMark Adams 
439*6618991cSMark Adams   PetscFunctionBegin;
440*6618991cSMark Adams   a_tab->size = a_size;
441*6618991cSMark Adams   ierr = PetscMalloc1(a_size, &a_tab->table);CHKERRQ(ierr);
442*6618991cSMark Adams   ierr = PetscMalloc1(a_size, &a_tab->data);CHKERRQ(ierr);
443*6618991cSMark Adams   for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
444*6618991cSMark Adams   PetscFunctionReturn(0);
445*6618991cSMark Adams }
446*6618991cSMark Adams 
447*6618991cSMark Adams #undef __FUNCT__
448*6618991cSMark Adams #define __FUNCT__ "GAMGTableDestroy"
449*6618991cSMark Adams PetscErrorCode GAMGTableDestroy(GAMGHashTable *a_tab)
450*6618991cSMark Adams {
451*6618991cSMark Adams   PetscErrorCode ierr;
452*6618991cSMark Adams 
453*6618991cSMark Adams   PetscFunctionBegin;
454*6618991cSMark Adams   ierr = PetscFree(a_tab->table);CHKERRQ(ierr);
455*6618991cSMark Adams   ierr = PetscFree(a_tab->data);CHKERRQ(ierr);
456*6618991cSMark Adams   PetscFunctionReturn(0);
457*6618991cSMark Adams }
458*6618991cSMark Adams 
459*6618991cSMark Adams #undef __FUNCT__
460*6618991cSMark Adams #define __FUNCT__ "GAMGTableAdd"
461*6618991cSMark Adams PetscErrorCode GAMGTableAdd(GAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
462*6618991cSMark Adams {
463*6618991cSMark Adams   PetscInt kk,idx;
464*6618991cSMark Adams 
465*6618991cSMark Adams   PetscFunctionBegin;
466*6618991cSMark Adams   if (a_key<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %d.",a_key);
467*6618991cSMark Adams   for (kk = 0, idx = GAMG_HASH(a_key);
468*6618991cSMark Adams        kk < a_tab->size;
469*6618991cSMark Adams        kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
470*6618991cSMark Adams 
471*6618991cSMark Adams     if (a_tab->table[idx] == a_key) {
472*6618991cSMark Adams       /* exists */
473*6618991cSMark Adams       a_tab->data[idx] = a_data;
474*6618991cSMark Adams       break;
475*6618991cSMark Adams     } else if (a_tab->table[idx] == -1) {
476*6618991cSMark Adams       /* add */
477*6618991cSMark Adams       a_tab->table[idx] = a_key;
478*6618991cSMark Adams       a_tab->data[idx]  = a_data;
479*6618991cSMark Adams       break;
480*6618991cSMark Adams     }
481*6618991cSMark Adams   }
482*6618991cSMark Adams   if (kk==a_tab->size) {
483*6618991cSMark Adams     /* this is not to efficient, waiting until completely full */
484*6618991cSMark Adams     PetscInt       oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
485*6618991cSMark Adams     PetscErrorCode ierr;
486*6618991cSMark Adams 
487*6618991cSMark Adams     a_tab->size = new_size;
488*6618991cSMark Adams 
489*6618991cSMark Adams     ierr = PetscMalloc1(a_tab->size, &a_tab->table);CHKERRQ(ierr);
490*6618991cSMark Adams     ierr = PetscMalloc1(a_tab->size, &a_tab->data);CHKERRQ(ierr);
491*6618991cSMark Adams 
492*6618991cSMark Adams     for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
493*6618991cSMark Adams     for (kk=0;kk<oldsize;kk++) {
494*6618991cSMark Adams       if (oldtable[kk] != -1) {
495*6618991cSMark Adams         ierr = GAMGTableAdd(a_tab, oldtable[kk], olddata[kk]);CHKERRQ(ierr);
496*6618991cSMark Adams        }
497*6618991cSMark Adams     }
498*6618991cSMark Adams     ierr = PetscFree(oldtable);CHKERRQ(ierr);
499*6618991cSMark Adams     ierr = PetscFree(olddata);CHKERRQ(ierr);
500*6618991cSMark Adams     ierr = GAMGTableAdd(a_tab, a_key, a_data);CHKERRQ(ierr);
501*6618991cSMark Adams   }
502*6618991cSMark Adams   PetscFunctionReturn(0);
503*6618991cSMark Adams }
504*6618991cSMark Adams 
505