1*6618991cSMark Adams /* 2*6618991cSMark Adams GAMG geometric-algebric multigrid PC - Mark Adams 2011 3*6618991cSMark Adams */ 4*6618991cSMark Adams #include <petsc/private/matimpl.h> 5*6618991cSMark Adams #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/ 6*6618991cSMark Adams #include <petsc/private/kspimpl.h> 7*6618991cSMark Adams 8*6618991cSMark Adams #undef __FUNCT__ 9*6618991cSMark Adams #define __FUNCT__ "MatCollapseRow" 10*6618991cSMark Adams /* 11*6618991cSMark Adams Produces a set of block column indices of the matrix row, one for each block represented in the original row 12*6618991cSMark Adams 13*6618991cSMark Adams n - the number of block indices in cc[] 14*6618991cSMark Adams cc - the block indices (must be large enough to contain the indices) 15*6618991cSMark Adams */ 16*6618991cSMark Adams PETSC_STATIC_INLINE PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 17*6618991cSMark Adams { 18*6618991cSMark Adams PetscInt cnt = -1,nidx,j; 19*6618991cSMark Adams const PetscInt *idx; 20*6618991cSMark Adams PetscErrorCode ierr; 21*6618991cSMark Adams 22*6618991cSMark Adams PetscFunctionBegin; 23*6618991cSMark Adams ierr = MatGetRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr); 24*6618991cSMark Adams if (nidx) { 25*6618991cSMark Adams cnt = 0; 26*6618991cSMark Adams cc[cnt] = idx[0]/bs; 27*6618991cSMark Adams for (j=1; j<nidx; j++) { 28*6618991cSMark Adams if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 29*6618991cSMark Adams } 30*6618991cSMark Adams } 31*6618991cSMark Adams ierr = MatRestoreRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr); 32*6618991cSMark Adams *n = cnt+1; 33*6618991cSMark Adams PetscFunctionReturn(0); 34*6618991cSMark Adams } 35*6618991cSMark Adams 36*6618991cSMark Adams #undef __FUNCT__ 37*6618991cSMark Adams #define __FUNCT__ "MatCollapseRows" 38*6618991cSMark Adams /* 39*6618991cSMark Adams Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 40*6618991cSMark Adams 41*6618991cSMark Adams ncollapsed - the number of block indices 42*6618991cSMark Adams collapsed - the block indices (must be large enough to contain the indices) 43*6618991cSMark Adams */ 44*6618991cSMark Adams PETSC_STATIC_INLINE PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 45*6618991cSMark Adams { 46*6618991cSMark Adams PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 47*6618991cSMark Adams PetscErrorCode ierr; 48*6618991cSMark Adams 49*6618991cSMark Adams PetscFunctionBegin; 50*6618991cSMark Adams ierr = MatCollapseRow(Amat,start,bs,&nprev,cprev);CHKERRQ(ierr); 51*6618991cSMark Adams for (i=start+1; i<start+bs; i++) { 52*6618991cSMark Adams ierr = MatCollapseRow(Amat,i,bs,&ncur,ccur);CHKERRQ(ierr); 53*6618991cSMark Adams ierr = PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged);CHKERRQ(ierr); 54*6618991cSMark Adams cprevtmp = cprev; cprev = merged; merged = cprevtmp; 55*6618991cSMark Adams } 56*6618991cSMark Adams *ncollapsed = nprev; 57*6618991cSMark Adams if (collapsed) *collapsed = cprev; 58*6618991cSMark Adams PetscFunctionReturn(0); 59*6618991cSMark Adams } 60*6618991cSMark Adams 61*6618991cSMark Adams 62*6618991cSMark Adams /* -------------------------------------------------------------------------- */ 63*6618991cSMark Adams /* 64*6618991cSMark Adams PCGAMGCreateGraph - create simple scaled scalar graph from matrix 65*6618991cSMark Adams 66*6618991cSMark Adams Input Parameter: 67*6618991cSMark Adams . Amat - matrix 68*6618991cSMark Adams Output Parameter: 69*6618991cSMark Adams . a_Gmaat - eoutput scalar graph (symmetric?) 70*6618991cSMark Adams */ 71*6618991cSMark Adams #undef __FUNCT__ 72*6618991cSMark Adams #define __FUNCT__ "PCGAMGCreateGraph" 73*6618991cSMark Adams PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat) 74*6618991cSMark Adams { 75*6618991cSMark Adams PetscErrorCode ierr; 76*6618991cSMark Adams PetscInt Istart,Iend,Ii,i,jj,kk,ncols,nloc,NN,MM,bs; 77*6618991cSMark Adams MPI_Comm comm; 78*6618991cSMark Adams Mat Gmat; 79*6618991cSMark Adams MatType mtype; 80*6618991cSMark Adams 81*6618991cSMark Adams PetscFunctionBegin; 82*6618991cSMark Adams ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 83*6618991cSMark Adams ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr); 84*6618991cSMark Adams ierr = MatGetSize(Amat, &MM, &NN);CHKERRQ(ierr); 85*6618991cSMark Adams ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr); 86*6618991cSMark Adams nloc = (Iend-Istart)/bs; 87*6618991cSMark Adams 88*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG 89*6618991cSMark Adams ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 90*6618991cSMark Adams #endif 91*6618991cSMark Adams 92*6618991cSMark Adams if (bs > 1) { 93*6618991cSMark Adams const PetscScalar *vals; 94*6618991cSMark Adams const PetscInt *idx; 95*6618991cSMark Adams PetscInt *d_nnz, *o_nnz,*blockmask = NULL,maskcnt,*w0,*w1,*w2; 96*6618991cSMark Adams PetscBool ismpiaij,isseqaij; 97*6618991cSMark Adams 98*6618991cSMark Adams /* 99*6618991cSMark Adams Determine the preallocation needed for the scalar matrix derived from the vector matrix. 100*6618991cSMark Adams */ 101*6618991cSMark Adams 102*6618991cSMark Adams ierr = PetscObjectTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 103*6618991cSMark Adams ierr = PetscObjectTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 104*6618991cSMark Adams 105*6618991cSMark Adams ierr = PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);CHKERRQ(ierr); 106*6618991cSMark Adams 107*6618991cSMark Adams if (isseqaij) { 108*6618991cSMark Adams PetscInt max_d_nnz; 109*6618991cSMark Adams 110*6618991cSMark Adams /* 111*6618991cSMark Adams Determine exact preallocation count for (sequential) scalar matrix 112*6618991cSMark Adams */ 113*6618991cSMark Adams ierr = MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz);CHKERRQ(ierr); 114*6618991cSMark Adams max_d_nnz = PetscMin(nloc,bs*max_d_nnz);CHKERRQ(ierr); 115*6618991cSMark Adams ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr); 116*6618991cSMark Adams for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 117*6618991cSMark Adams ierr = MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr); 118*6618991cSMark Adams } 119*6618991cSMark Adams ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr); 120*6618991cSMark Adams 121*6618991cSMark Adams } else if (ismpiaij) { 122*6618991cSMark Adams Mat Daij,Oaij; 123*6618991cSMark Adams const PetscInt *garray; 124*6618991cSMark Adams PetscInt max_d_nnz; 125*6618991cSMark Adams 126*6618991cSMark Adams ierr = MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray);CHKERRQ(ierr); 127*6618991cSMark Adams 128*6618991cSMark Adams /* 129*6618991cSMark Adams Determine exact preallocation count for diagonal block portion of scalar matrix 130*6618991cSMark Adams */ 131*6618991cSMark Adams ierr = MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz);CHKERRQ(ierr); 132*6618991cSMark Adams max_d_nnz = PetscMin(nloc,bs*max_d_nnz);CHKERRQ(ierr); 133*6618991cSMark Adams ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr); 134*6618991cSMark Adams for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 135*6618991cSMark Adams ierr = MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr); 136*6618991cSMark Adams } 137*6618991cSMark Adams ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr); 138*6618991cSMark Adams 139*6618991cSMark Adams /* 140*6618991cSMark Adams Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 141*6618991cSMark Adams */ 142*6618991cSMark Adams for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 143*6618991cSMark Adams o_nnz[jj] = 0; 144*6618991cSMark Adams for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 145*6618991cSMark Adams ierr = MatGetRow(Oaij,Ii+kk,&ncols,0,0);CHKERRQ(ierr); 146*6618991cSMark Adams o_nnz[jj] += ncols; 147*6618991cSMark Adams ierr = MatRestoreRow(Oaij,Ii+kk,&ncols,0,0);CHKERRQ(ierr); 148*6618991cSMark Adams } 149*6618991cSMark Adams if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 150*6618991cSMark Adams } 151*6618991cSMark Adams 152*6618991cSMark Adams } else { 153*6618991cSMark Adams /* 154*6618991cSMark Adams 155*6618991cSMark Adams This is O(nloc*nloc/bs) work! 156*6618991cSMark Adams 157*6618991cSMark Adams This is accurate for the "diagonal" block of the matrix but will be grossly high for the 158*6618991cSMark Adams off diagonal block most of the time but could be too low for the off-diagonal. 159*6618991cSMark Adams 160*6618991cSMark Adams This should be fixed to be accurate for the off-diagonal portion. Cannot just use a mask 161*6618991cSMark Adams for the off-diagonal portion since for huge matrices that would require too much memory per 162*6618991cSMark Adams MPI process. 163*6618991cSMark Adams */ 164*6618991cSMark Adams ierr = PetscMalloc1(nloc, &blockmask);CHKERRQ(ierr); 165*6618991cSMark Adams for (Ii = Istart, jj = 0; Ii < Iend; Ii += bs, jj++) { 166*6618991cSMark Adams o_nnz[jj] = 0; 167*6618991cSMark Adams ierr = PetscMemzero(blockmask,nloc*sizeof(PetscInt));CHKERRQ(ierr); 168*6618991cSMark Adams for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 169*6618991cSMark Adams ierr = MatGetRow(Amat,Ii+kk,&ncols,&idx,0);CHKERRQ(ierr); 170*6618991cSMark Adams for (i=0; i<ncols; i++) { 171*6618991cSMark Adams if (idx[i] >= Istart && idx[i] < Iend) { 172*6618991cSMark Adams blockmask[(idx[i] - Istart)/bs] = 1; 173*6618991cSMark Adams } 174*6618991cSMark Adams } 175*6618991cSMark Adams if (ncols > o_nnz[jj]) { 176*6618991cSMark Adams o_nnz[jj] = ncols; 177*6618991cSMark Adams if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 178*6618991cSMark Adams } 179*6618991cSMark Adams ierr = MatRestoreRow(Amat,Ii+kk,&ncols,&idx,0);CHKERRQ(ierr); 180*6618991cSMark Adams } 181*6618991cSMark Adams maskcnt = 0; 182*6618991cSMark Adams for (i=0; i<nloc; i++) { 183*6618991cSMark Adams if (blockmask[i]) maskcnt++; 184*6618991cSMark Adams } 185*6618991cSMark Adams d_nnz[jj] = maskcnt; 186*6618991cSMark Adams } 187*6618991cSMark Adams ierr = PetscFree(blockmask);CHKERRQ(ierr); 188*6618991cSMark Adams } 189*6618991cSMark Adams 190*6618991cSMark Adams /* get scalar copy (norms) of matrix -- AIJ specific!!! */ 191*6618991cSMark Adams ierr = MatGetType(Amat,&mtype);CHKERRQ(ierr); 192*6618991cSMark Adams ierr = MatCreate(comm, &Gmat);CHKERRQ(ierr); 193*6618991cSMark Adams ierr = MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 194*6618991cSMark Adams ierr = MatSetBlockSizes(Gmat, 1, 1);CHKERRQ(ierr); 195*6618991cSMark Adams ierr = MatSetType(Gmat, mtype);CHKERRQ(ierr); 196*6618991cSMark Adams ierr = MatSeqAIJSetPreallocation(Gmat,0,d_nnz);CHKERRQ(ierr); 197*6618991cSMark Adams ierr = MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 198*6618991cSMark Adams ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 199*6618991cSMark Adams 200*6618991cSMark Adams for (Ii = Istart; Ii < Iend; Ii++) { 201*6618991cSMark Adams PetscInt dest_row = Ii/bs; 202*6618991cSMark Adams ierr = MatGetRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 203*6618991cSMark Adams for (jj=0; jj<ncols; jj++) { 204*6618991cSMark Adams PetscInt dest_col = idx[jj]/bs; 205*6618991cSMark Adams PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 206*6618991cSMark Adams ierr = MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);CHKERRQ(ierr); 207*6618991cSMark Adams } 208*6618991cSMark Adams ierr = MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 209*6618991cSMark Adams } 210*6618991cSMark Adams ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 211*6618991cSMark Adams ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 212*6618991cSMark Adams } else { 213*6618991cSMark Adams /* just copy scalar matrix - abs() not taken here but scaled later */ 214*6618991cSMark Adams ierr = MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);CHKERRQ(ierr); 215*6618991cSMark Adams } 216*6618991cSMark Adams 217*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG 218*6618991cSMark Adams ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 219*6618991cSMark Adams #endif 220*6618991cSMark Adams 221*6618991cSMark Adams *a_Gmat = Gmat; 222*6618991cSMark Adams PetscFunctionReturn(0); 223*6618991cSMark Adams } 224*6618991cSMark Adams 225*6618991cSMark Adams /* -------------------------------------------------------------------------- */ 226*6618991cSMark Adams /* 227*6618991cSMark Adams PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and symetrize if needed 228*6618991cSMark Adams 229*6618991cSMark Adams Input Parameter: 230*6618991cSMark Adams . vfilter - threshold paramter [0,1) 231*6618991cSMark Adams . symm - symetrize? 232*6618991cSMark Adams In/Output Parameter: 233*6618991cSMark Adams . a_Gmat - original graph 234*6618991cSMark Adams */ 235*6618991cSMark Adams #undef __FUNCT__ 236*6618991cSMark Adams #define __FUNCT__ "PCGAMGFilterGraph" 237*6618991cSMark Adams PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm) 238*6618991cSMark Adams { 239*6618991cSMark Adams PetscErrorCode ierr; 240*6618991cSMark Adams PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc; 241*6618991cSMark Adams PetscMPIInt rank; 242*6618991cSMark Adams Mat Gmat = *a_Gmat, tGmat, matTrans; 243*6618991cSMark Adams MPI_Comm comm; 244*6618991cSMark Adams const PetscScalar *vals; 245*6618991cSMark Adams const PetscInt *idx; 246*6618991cSMark Adams PetscInt *d_nnz, *o_nnz; 247*6618991cSMark Adams Vec diag; 248*6618991cSMark Adams MatType mtype; 249*6618991cSMark Adams 250*6618991cSMark Adams PetscFunctionBegin; 251*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG 252*6618991cSMark Adams ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 253*6618991cSMark Adams #endif 254*6618991cSMark Adams /* scale Gmat for all values between -1 and 1 */ 255*6618991cSMark Adams ierr = MatCreateVecs(Gmat, &diag, 0);CHKERRQ(ierr); 256*6618991cSMark Adams ierr = MatGetDiagonal(Gmat, diag);CHKERRQ(ierr); 257*6618991cSMark Adams ierr = VecReciprocal(diag);CHKERRQ(ierr); 258*6618991cSMark Adams ierr = VecSqrtAbs(diag);CHKERRQ(ierr); 259*6618991cSMark Adams ierr = MatDiagonalScale(Gmat, diag, diag);CHKERRQ(ierr); 260*6618991cSMark Adams ierr = VecDestroy(&diag);CHKERRQ(ierr); 261*6618991cSMark Adams 262*6618991cSMark Adams if (vfilter < 0.0 && !symm) { 263*6618991cSMark Adams /* Just use the provided matrix as the graph but make all values positive */ 264*6618991cSMark Adams Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Gmat->data; 265*6618991cSMark Adams MatInfo info; 266*6618991cSMark Adams PetscScalar *avals; 267*6618991cSMark Adams PetscMPIInt size; 268*6618991cSMark Adams 269*6618991cSMark Adams ierr = MPI_Comm_size(PetscObjectComm((PetscObject)Gmat),&size);CHKERRQ(ierr); 270*6618991cSMark Adams if (size == 1) { 271*6618991cSMark Adams ierr = MatGetInfo(Gmat,MAT_LOCAL,&info);CHKERRQ(ierr); 272*6618991cSMark Adams ierr = MatSeqAIJGetArray(Gmat,&avals);CHKERRQ(ierr); 273*6618991cSMark Adams for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 274*6618991cSMark Adams ierr = MatSeqAIJRestoreArray(Gmat,&avals);CHKERRQ(ierr); 275*6618991cSMark Adams } else { 276*6618991cSMark Adams ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 277*6618991cSMark Adams ierr = MatSeqAIJGetArray(aij->A,&avals);CHKERRQ(ierr); 278*6618991cSMark Adams for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 279*6618991cSMark Adams ierr = MatSeqAIJRestoreArray(aij->A,&avals);CHKERRQ(ierr); 280*6618991cSMark Adams ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 281*6618991cSMark Adams ierr = MatSeqAIJGetArray(aij->B,&avals);CHKERRQ(ierr); 282*6618991cSMark Adams for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 283*6618991cSMark Adams ierr = MatSeqAIJRestoreArray(aij->B,&avals);CHKERRQ(ierr); 284*6618991cSMark Adams } 285*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG 286*6618991cSMark Adams ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 287*6618991cSMark Adams #endif 288*6618991cSMark Adams PetscFunctionReturn(0); 289*6618991cSMark Adams } 290*6618991cSMark Adams 291*6618991cSMark Adams ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr); 292*6618991cSMark Adams ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 293*6618991cSMark Adams ierr = MatGetOwnershipRange(Gmat, &Istart, &Iend);CHKERRQ(ierr); 294*6618991cSMark Adams nloc = Iend - Istart; 295*6618991cSMark Adams ierr = MatGetSize(Gmat, &MM, &NN);CHKERRQ(ierr); 296*6618991cSMark Adams 297*6618991cSMark Adams if (symm) { 298*6618991cSMark Adams ierr = MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);CHKERRQ(ierr); 299*6618991cSMark Adams } 300*6618991cSMark Adams 301*6618991cSMark Adams /* Determine upper bound on nonzeros needed in new filtered matrix */ 302*6618991cSMark Adams ierr = PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz);CHKERRQ(ierr); 303*6618991cSMark Adams for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) { 304*6618991cSMark Adams ierr = MatGetRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); 305*6618991cSMark Adams d_nnz[jj] = ncols; 306*6618991cSMark Adams o_nnz[jj] = ncols; 307*6618991cSMark Adams ierr = MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); 308*6618991cSMark Adams if (symm) { 309*6618991cSMark Adams ierr = MatGetRow(matTrans,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); 310*6618991cSMark Adams d_nnz[jj] += ncols; 311*6618991cSMark Adams o_nnz[jj] += ncols; 312*6618991cSMark Adams ierr = MatRestoreRow(matTrans,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); 313*6618991cSMark Adams } 314*6618991cSMark Adams if (d_nnz[jj] > nloc) d_nnz[jj] = nloc; 315*6618991cSMark Adams if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc; 316*6618991cSMark Adams } 317*6618991cSMark Adams ierr = MatGetType(Gmat,&mtype);CHKERRQ(ierr); 318*6618991cSMark Adams ierr = MatCreate(comm, &tGmat);CHKERRQ(ierr); 319*6618991cSMark Adams ierr = MatSetSizes(tGmat,nloc,nloc,MM,MM);CHKERRQ(ierr); 320*6618991cSMark Adams ierr = MatSetBlockSizes(tGmat, 1, 1);CHKERRQ(ierr); 321*6618991cSMark Adams ierr = MatSetType(tGmat, mtype);CHKERRQ(ierr); 322*6618991cSMark Adams ierr = MatSeqAIJSetPreallocation(tGmat,0,d_nnz);CHKERRQ(ierr); 323*6618991cSMark Adams ierr = MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 324*6618991cSMark Adams ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 325*6618991cSMark Adams if (symm) { 326*6618991cSMark Adams ierr = MatDestroy(&matTrans);CHKERRQ(ierr); 327*6618991cSMark Adams } else { 328*6618991cSMark Adams /* all entries are generated locally so MatAssembly will be slightly faster for large process counts */ 329*6618991cSMark Adams ierr = MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 330*6618991cSMark Adams } 331*6618991cSMark Adams 332*6618991cSMark Adams for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) { 333*6618991cSMark Adams ierr = MatGetRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 334*6618991cSMark Adams for (jj=0; jj<ncols; jj++,nnz0++) { 335*6618991cSMark Adams PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 336*6618991cSMark Adams if (PetscRealPart(sv) > vfilter) { 337*6618991cSMark Adams nnz1++; 338*6618991cSMark Adams if (symm) { 339*6618991cSMark Adams sv *= 0.5; 340*6618991cSMark Adams ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);CHKERRQ(ierr); 341*6618991cSMark Adams ierr = MatSetValues(tGmat,1,&idx[jj],1,&Ii,&sv,ADD_VALUES);CHKERRQ(ierr); 342*6618991cSMark Adams } else { 343*6618991cSMark Adams ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);CHKERRQ(ierr); 344*6618991cSMark Adams } 345*6618991cSMark Adams } 346*6618991cSMark Adams } 347*6618991cSMark Adams ierr = MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 348*6618991cSMark Adams } 349*6618991cSMark Adams ierr = MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 350*6618991cSMark Adams ierr = MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 351*6618991cSMark Adams 352*6618991cSMark Adams #if defined PETSC_GAMG_USE_LOG 353*6618991cSMark Adams ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 354*6618991cSMark Adams #endif 355*6618991cSMark Adams 356*6618991cSMark Adams #if defined(PETSC_USE_INFO) 357*6618991cSMark Adams { 358*6618991cSMark Adams double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc; 359*6618991cSMark Adams ierr = PetscInfo4(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM);CHKERRQ(ierr); 360*6618991cSMark Adams } 361*6618991cSMark Adams #endif 362*6618991cSMark Adams ierr = MatDestroy(&Gmat);CHKERRQ(ierr); 363*6618991cSMark Adams *a_Gmat = tGmat; 364*6618991cSMark Adams PetscFunctionReturn(0); 365*6618991cSMark Adams } 366*6618991cSMark Adams 367*6618991cSMark Adams /* -------------------------------------------------------------------------- */ 368*6618991cSMark Adams /* 369*6618991cSMark Adams PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have > 1 pe 370*6618991cSMark Adams 371*6618991cSMark Adams Input Parameter: 372*6618991cSMark Adams . Gmat - MPIAIJ matrix for scattters 373*6618991cSMark Adams . data_sz - number of data terms per node (# cols in output) 374*6618991cSMark Adams . data_in[nloc*data_sz] - column oriented data 375*6618991cSMark Adams Output Parameter: 376*6618991cSMark Adams . a_stride - numbrt of rows of output 377*6618991cSMark Adams . a_data_out[stride*data_sz] - output data with ghosts 378*6618991cSMark Adams */ 379*6618991cSMark Adams #undef __FUNCT__ 380*6618991cSMark Adams #define __FUNCT__ "PCGAMGGetDataWithGhosts" 381*6618991cSMark Adams PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out) 382*6618991cSMark Adams { 383*6618991cSMark Adams PetscErrorCode ierr; 384*6618991cSMark Adams MPI_Comm comm; 385*6618991cSMark Adams Vec tmp_crds; 386*6618991cSMark Adams Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data; 387*6618991cSMark Adams PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc; 388*6618991cSMark Adams PetscScalar *data_arr; 389*6618991cSMark Adams PetscReal *datas; 390*6618991cSMark Adams PetscBool isMPIAIJ; 391*6618991cSMark Adams 392*6618991cSMark Adams PetscFunctionBegin; 393*6618991cSMark Adams ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr); 394*6618991cSMark Adams ierr = PetscObjectTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ);CHKERRQ(ierr); 395*6618991cSMark Adams ierr = MatGetOwnershipRange(Gmat, &my0, &Iend);CHKERRQ(ierr); 396*6618991cSMark Adams nloc = Iend - my0; 397*6618991cSMark Adams ierr = VecGetLocalSize(mpimat->lvec, &num_ghosts);CHKERRQ(ierr); 398*6618991cSMark Adams nnodes = num_ghosts + nloc; 399*6618991cSMark Adams *a_stride = nnodes; 400*6618991cSMark Adams ierr = MatCreateVecs(Gmat, &tmp_crds, 0);CHKERRQ(ierr); 401*6618991cSMark Adams 402*6618991cSMark Adams ierr = PetscMalloc1(data_sz*nnodes, &datas);CHKERRQ(ierr); 403*6618991cSMark Adams for (dir=0; dir<data_sz; dir++) { 404*6618991cSMark Adams /* set local, and global */ 405*6618991cSMark Adams for (kk=0; kk<nloc; kk++) { 406*6618991cSMark Adams PetscInt gid = my0 + kk; 407*6618991cSMark Adams PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */ 408*6618991cSMark Adams datas[dir*nnodes + kk] = PetscRealPart(crd); 409*6618991cSMark Adams 410*6618991cSMark Adams ierr = VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES);CHKERRQ(ierr); 411*6618991cSMark Adams } 412*6618991cSMark Adams ierr = VecAssemblyBegin(tmp_crds);CHKERRQ(ierr); 413*6618991cSMark Adams ierr = VecAssemblyEnd(tmp_crds);CHKERRQ(ierr); 414*6618991cSMark Adams /* get ghost datas */ 415*6618991cSMark Adams ierr = VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416*6618991cSMark Adams ierr = VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417*6618991cSMark Adams ierr = VecGetArray(mpimat->lvec, &data_arr);CHKERRQ(ierr); 418*6618991cSMark Adams for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]); 419*6618991cSMark Adams ierr = VecRestoreArray(mpimat->lvec, &data_arr);CHKERRQ(ierr); 420*6618991cSMark Adams } 421*6618991cSMark Adams ierr = VecDestroy(&tmp_crds);CHKERRQ(ierr); 422*6618991cSMark Adams *a_data_out = datas; 423*6618991cSMark Adams PetscFunctionReturn(0); 424*6618991cSMark Adams } 425*6618991cSMark Adams 426*6618991cSMark Adams 427*6618991cSMark Adams /* 428*6618991cSMark Adams * 429*6618991cSMark Adams * GAMGTableCreate 430*6618991cSMark Adams */ 431*6618991cSMark Adams 432*6618991cSMark Adams #undef __FUNCT__ 433*6618991cSMark Adams #define __FUNCT__ "GAMGTableCreate" 434*6618991cSMark Adams PetscErrorCode GAMGTableCreate(PetscInt a_size, GAMGHashTable *a_tab) 435*6618991cSMark Adams { 436*6618991cSMark Adams PetscErrorCode ierr; 437*6618991cSMark Adams PetscInt kk; 438*6618991cSMark Adams 439*6618991cSMark Adams PetscFunctionBegin; 440*6618991cSMark Adams a_tab->size = a_size; 441*6618991cSMark Adams ierr = PetscMalloc1(a_size, &a_tab->table);CHKERRQ(ierr); 442*6618991cSMark Adams ierr = PetscMalloc1(a_size, &a_tab->data);CHKERRQ(ierr); 443*6618991cSMark Adams for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1; 444*6618991cSMark Adams PetscFunctionReturn(0); 445*6618991cSMark Adams } 446*6618991cSMark Adams 447*6618991cSMark Adams #undef __FUNCT__ 448*6618991cSMark Adams #define __FUNCT__ "GAMGTableDestroy" 449*6618991cSMark Adams PetscErrorCode GAMGTableDestroy(GAMGHashTable *a_tab) 450*6618991cSMark Adams { 451*6618991cSMark Adams PetscErrorCode ierr; 452*6618991cSMark Adams 453*6618991cSMark Adams PetscFunctionBegin; 454*6618991cSMark Adams ierr = PetscFree(a_tab->table);CHKERRQ(ierr); 455*6618991cSMark Adams ierr = PetscFree(a_tab->data);CHKERRQ(ierr); 456*6618991cSMark Adams PetscFunctionReturn(0); 457*6618991cSMark Adams } 458*6618991cSMark Adams 459*6618991cSMark Adams #undef __FUNCT__ 460*6618991cSMark Adams #define __FUNCT__ "GAMGTableAdd" 461*6618991cSMark Adams PetscErrorCode GAMGTableAdd(GAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data) 462*6618991cSMark Adams { 463*6618991cSMark Adams PetscInt kk,idx; 464*6618991cSMark Adams 465*6618991cSMark Adams PetscFunctionBegin; 466*6618991cSMark Adams if (a_key<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %d.",a_key); 467*6618991cSMark Adams for (kk = 0, idx = GAMG_HASH(a_key); 468*6618991cSMark Adams kk < a_tab->size; 469*6618991cSMark Adams kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) { 470*6618991cSMark Adams 471*6618991cSMark Adams if (a_tab->table[idx] == a_key) { 472*6618991cSMark Adams /* exists */ 473*6618991cSMark Adams a_tab->data[idx] = a_data; 474*6618991cSMark Adams break; 475*6618991cSMark Adams } else if (a_tab->table[idx] == -1) { 476*6618991cSMark Adams /* add */ 477*6618991cSMark Adams a_tab->table[idx] = a_key; 478*6618991cSMark Adams a_tab->data[idx] = a_data; 479*6618991cSMark Adams break; 480*6618991cSMark Adams } 481*6618991cSMark Adams } 482*6618991cSMark Adams if (kk==a_tab->size) { 483*6618991cSMark Adams /* this is not to efficient, waiting until completely full */ 484*6618991cSMark Adams PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data; 485*6618991cSMark Adams PetscErrorCode ierr; 486*6618991cSMark Adams 487*6618991cSMark Adams a_tab->size = new_size; 488*6618991cSMark Adams 489*6618991cSMark Adams ierr = PetscMalloc1(a_tab->size, &a_tab->table);CHKERRQ(ierr); 490*6618991cSMark Adams ierr = PetscMalloc1(a_tab->size, &a_tab->data);CHKERRQ(ierr); 491*6618991cSMark Adams 492*6618991cSMark Adams for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1; 493*6618991cSMark Adams for (kk=0;kk<oldsize;kk++) { 494*6618991cSMark Adams if (oldtable[kk] != -1) { 495*6618991cSMark Adams ierr = GAMGTableAdd(a_tab, oldtable[kk], olddata[kk]);CHKERRQ(ierr); 496*6618991cSMark Adams } 497*6618991cSMark Adams } 498*6618991cSMark Adams ierr = PetscFree(oldtable);CHKERRQ(ierr); 499*6618991cSMark Adams ierr = PetscFree(olddata);CHKERRQ(ierr); 500*6618991cSMark Adams ierr = GAMGTableAdd(a_tab, a_key, a_data);CHKERRQ(ierr); 501*6618991cSMark Adams } 502*6618991cSMark Adams PetscFunctionReturn(0); 503*6618991cSMark Adams } 504*6618991cSMark Adams 505