1dba47a55SKris Buschelman #define PETSCKSP_DLL 2dba47a55SKris Buschelman 34b9ad928SBarry Smith /* 4*3f457be1SHong Zhang This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 54b9ad928SBarry Smith */ 66356e834SBarry Smith #include "private/pcimpl.h" /*I "petscpc.h" I*/ 74b9ad928SBarry Smith #include "petscksp.h" 84b9ad928SBarry Smith 9*3f457be1SHong Zhang #undef CONTIGUOUS_COLOR 10*3f457be1SHong Zhang #define INTER_COLOR 11*3f457be1SHong Zhang 124b9ad928SBarry Smith typedef struct { 134b9ad928SBarry Smith PC pc; /* actual preconditioner used on each processor */ 14*3f457be1SHong Zhang Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 15*3f457be1SHong Zhang Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 164b9ad928SBarry Smith Mat *pmats; /* matrix and optional preconditioner matrix */ 17*3f457be1SHong Zhang Mat pmats_sub; /* matrix and optional preconditioner matrix */ 18*3f457be1SHong Zhang VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 194b9ad928SBarry Smith PetscTruth useparallelmat; 20*3f457be1SHong Zhang MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 21*3f457be1SHong Zhang MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 22*3f457be1SHong Zhang that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 23*3f457be1SHong Zhang PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 24*3f457be1SHong Zhang PetscInt color; /* color of processors in a subcommunicator */ 254b9ad928SBarry Smith } PC_Redundant; 264b9ad928SBarry Smith 274b9ad928SBarry Smith #undef __FUNCT__ 284b9ad928SBarry Smith #define __FUNCT__ "PCView_Redundant" 296849ba73SBarry Smith static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 304b9ad928SBarry Smith { 314b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 32dfbe8321SBarry Smith PetscErrorCode ierr; 3313f74950SBarry Smith PetscMPIInt rank; 3432077d6dSBarry Smith PetscTruth iascii,isstring; 354b9ad928SBarry Smith PetscViewer sviewer; 364b9ad928SBarry Smith 374b9ad928SBarry Smith PetscFunctionBegin; 384b9ad928SBarry Smith ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 3932077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 404b9ad928SBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 4132077d6dSBarry Smith if (iascii) { 424b9ad928SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 434b9ad928SBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 444b9ad928SBarry Smith if (!rank) { 454b9ad928SBarry Smith ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 464b9ad928SBarry Smith ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 474b9ad928SBarry Smith ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 484b9ad928SBarry Smith } 494b9ad928SBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 504b9ad928SBarry Smith } else if (isstring) { 514b9ad928SBarry Smith ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 524b9ad928SBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 534b9ad928SBarry Smith if (!rank) { 544b9ad928SBarry Smith ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 554b9ad928SBarry Smith } 564b9ad928SBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 574b9ad928SBarry Smith } else { 5879a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 594b9ad928SBarry Smith } 604b9ad928SBarry Smith PetscFunctionReturn(0); 614b9ad928SBarry Smith } 624b9ad928SBarry Smith 63*3f457be1SHong Zhang #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 64*3f457be1SHong Zhang #include "private/vecimpl.h" 65*3f457be1SHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 66*3f457be1SHong Zhang #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 67*3f457be1SHong Zhang 68*3f457be1SHong Zhang #undef __FUNCT__ 69*3f457be1SHong Zhang #define __FUNCT__ "MatGetRedundantMatrix" 70*3f457be1SHong Zhang PetscErrorCode MatGetRedundantMatrix_AIJ(PC pc,Mat mat,MPI_Comm subcomm,PetscInt mlocal_sub,Mat *matredundant) 71*3f457be1SHong Zhang { 72*3f457be1SHong Zhang PetscMPIInt rank,size,subrank,subsize; 73*3f457be1SHong Zhang MPI_Comm comm=mat->comm; 74*3f457be1SHong Zhang PetscErrorCode ierr; 75*3f457be1SHong Zhang PC_Redundant *red=(PC_Redundant*)pc->data; 76*3f457be1SHong Zhang PetscInt nsubcomm=red->nsubcomm,nsends,nrecvs,i,prid=100,itmp; 77*3f457be1SHong Zhang PetscMPIInt *send_rank,*recv_rank; 78*3f457be1SHong Zhang PetscInt *rowrange=pc->pmat->rmap.range,mlocal_max,nzlocal; 79*3f457be1SHong Zhang Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 80*3f457be1SHong Zhang Mat A=aij->A,B=aij->B; 81*3f457be1SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 82*3f457be1SHong Zhang Mat C; 83*3f457be1SHong Zhang 84*3f457be1SHong Zhang PetscInt nleftover,np_subcomm,j; 85*3f457be1SHong Zhang PetscInt nz_A,nz_B,*sbuf_j; 86*3f457be1SHong Zhang PetscScalar *sbuf_a; 87*3f457be1SHong Zhang PetscInt cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 88*3f457be1SHong Zhang PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 89*3f457be1SHong Zhang PetscInt *cols,ctmp,lwrite,*rptr,l; 90*3f457be1SHong Zhang PetscScalar *vals,*aworkA,*aworkB; 91*3f457be1SHong Zhang 92*3f457be1SHong Zhang PetscMPIInt tag1,tag2,tag3,imdex; 93*3f457be1SHong Zhang MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 94*3f457be1SHong Zhang MPI_Status recv_status,*send_status; 95*3f457be1SHong Zhang PetscInt *sbuf_nz,*rbuf_nz,count; 96*3f457be1SHong Zhang 97*3f457be1SHong Zhang PetscInt **rbuf_j; 98*3f457be1SHong Zhang PetscScalar **rbuf_a; 99*3f457be1SHong Zhang 100*3f457be1SHong Zhang PetscFunctionBegin; 101*3f457be1SHong Zhang ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 102*3f457be1SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 103*3f457be1SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 104*3f457be1SHong Zhang ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 105*3f457be1SHong Zhang ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 106*3f457be1SHong Zhang /* 107*3f457be1SHong Zhang ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize); 108*3f457be1SHong Zhang ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 109*3f457be1SHong Zhang */ 110*3f457be1SHong Zhang /* get the destination processors */ 111*3f457be1SHong Zhang ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 112*3f457be1SHong Zhang recv_rank = send_rank + size; 113*3f457be1SHong Zhang np_subcomm = size/nsubcomm; 114*3f457be1SHong Zhang nleftover = size - nsubcomm*np_subcomm; 115*3f457be1SHong Zhang nsends = 0; nrecvs = 0; 116*3f457be1SHong Zhang for (i=0; i<size; i++){ /* i=rank*/ 117*3f457be1SHong Zhang if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 118*3f457be1SHong Zhang send_rank[nsends] = i; nsends++; 119*3f457be1SHong Zhang recv_rank[nrecvs++] = i; 120*3f457be1SHong Zhang } 121*3f457be1SHong Zhang } 122*3f457be1SHong Zhang if (rank >= size - nleftover){/* this proc is a leftover processor */ 123*3f457be1SHong Zhang i = size-nleftover-1; 124*3f457be1SHong Zhang j = 0; 125*3f457be1SHong Zhang while (j < nsubcomm - nleftover){ 126*3f457be1SHong Zhang send_rank[nsends++] = i; 127*3f457be1SHong Zhang i--; j++; 128*3f457be1SHong Zhang } 129*3f457be1SHong Zhang } 130*3f457be1SHong Zhang 131*3f457be1SHong Zhang if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 132*3f457be1SHong Zhang for (i=0; i<nleftover; i++){ 133*3f457be1SHong Zhang recv_rank[nrecvs++] = size-nleftover+i; 134*3f457be1SHong Zhang } 135*3f457be1SHong Zhang } 136*3f457be1SHong Zhang if (prid == rank){ 137*3f457be1SHong Zhang printf("[%d] sends to ",rank); 138*3f457be1SHong Zhang for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]); 139*3f457be1SHong Zhang printf(" \n"); 140*3f457be1SHong Zhang } 141*3f457be1SHong Zhang /* 142*3f457be1SHong Zhang ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs); 143*3f457be1SHong Zhang ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 144*3f457be1SHong Zhang ierr = MPI_Barrier(comm);CHKERRQ(ierr); 145*3f457be1SHong Zhang */ 146*3f457be1SHong Zhang 147*3f457be1SHong Zhang /* get this processor's nzlocal=nz_A+nz_B */ 148*3f457be1SHong Zhang nz_A = a->nz; nz_B = b->nz; 149*3f457be1SHong Zhang nzlocal = nz_A + nz_B; 150*3f457be1SHong Zhang 151*3f457be1SHong Zhang /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */ 152*3f457be1SHong Zhang itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 153*3f457be1SHong Zhang ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 154*3f457be1SHong Zhang ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 155*3f457be1SHong Zhang 156*3f457be1SHong Zhang rptr = sbuf_j; 157*3f457be1SHong Zhang cols = sbuf_j + rend-rstart + 1; 158*3f457be1SHong Zhang vals = sbuf_a; 159*3f457be1SHong Zhang rptr[0] = 0; 160*3f457be1SHong Zhang for (i=0; i<rend-rstart; i++){ 161*3f457be1SHong Zhang row = i + rstart; 162*3f457be1SHong Zhang if (rank == prid) printf(" \n row %d: ",row); 163*3f457be1SHong Zhang nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 164*3f457be1SHong Zhang ncols = nzA + nzB; 165*3f457be1SHong Zhang cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 166*3f457be1SHong Zhang aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 167*3f457be1SHong Zhang /* load the column indices for this row into cols */ 168*3f457be1SHong Zhang lwrite = 0; 169*3f457be1SHong Zhang for (l=0; l<nzB; l++) { 170*3f457be1SHong Zhang if ((ctmp = bmap[cworkB[l]]) < cstart){ 171*3f457be1SHong Zhang vals[lwrite] = aworkB[l]; 172*3f457be1SHong Zhang cols[lwrite++] = ctmp; 173*3f457be1SHong Zhang if (rank == prid) printf(" (%d,%g)",ctmp,aworkB[l]); 174*3f457be1SHong Zhang } 175*3f457be1SHong Zhang } 176*3f457be1SHong Zhang for (l=0; l<nzA; l++){ 177*3f457be1SHong Zhang vals[lwrite] = aworkA[l]; 178*3f457be1SHong Zhang cols[lwrite++] = cstart + cworkA[l]; 179*3f457be1SHong Zhang if (rank == prid) printf(" (%d,%g)",cstart + cworkA[l],aworkA[l]); 180*3f457be1SHong Zhang } 181*3f457be1SHong Zhang for (l=0; l<nzB; l++) { 182*3f457be1SHong Zhang if ((ctmp = bmap[cworkB[l]]) >= cend){ 183*3f457be1SHong Zhang vals[lwrite] = aworkB[l]; 184*3f457be1SHong Zhang cols[lwrite++] = ctmp; 185*3f457be1SHong Zhang if (rank == prid) printf(" (%d,%g)",ctmp,aworkB[l]); 186*3f457be1SHong Zhang } 187*3f457be1SHong Zhang } 188*3f457be1SHong Zhang /* insert local matrix values into C */ 189*3f457be1SHong Zhang //ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 190*3f457be1SHong Zhang 191*3f457be1SHong Zhang vals += ncols; 192*3f457be1SHong Zhang cols += ncols; 193*3f457be1SHong Zhang rptr[i+1] = rptr[i] + ncols; 194*3f457be1SHong Zhang } 195*3f457be1SHong Zhang /* 196*3f457be1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197*3f457be1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198*3f457be1SHong Zhang */ 199*3f457be1SHong Zhang if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 200*3f457be1SHong Zhang 201*3f457be1SHong Zhang /* send nzlocal to others, and recv other's nzlocal */ 202*3f457be1SHong Zhang /*--------------------------------------------------*/ 203*3f457be1SHong Zhang ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 204*3f457be1SHong Zhang rbuf_nz = sbuf_nz + nsends; 205*3f457be1SHong Zhang 206*3f457be1SHong Zhang ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 207*3f457be1SHong Zhang s_waits2 = s_waits1 + nsends; 208*3f457be1SHong Zhang s_waits3 = s_waits2 + nsends; 209*3f457be1SHong Zhang r_waits1 = s_waits3 + nsends; 210*3f457be1SHong Zhang r_waits2 = r_waits1 + nrecvs; 211*3f457be1SHong Zhang r_waits3 = r_waits2 + nrecvs; 212*3f457be1SHong Zhang 213*3f457be1SHong Zhang /* get some new tags to keep the communication clean */ 214*3f457be1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr); 215*3f457be1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr); 216*3f457be1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr); 217*3f457be1SHong Zhang 218*3f457be1SHong Zhang /* post receives of other's nzlocal */ 219*3f457be1SHong Zhang for (i=0; i<nrecvs; i++){ 220*3f457be1SHong Zhang ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 221*3f457be1SHong Zhang } 222*3f457be1SHong Zhang 223*3f457be1SHong Zhang /* send nzlocal to others */ 224*3f457be1SHong Zhang for (i=0; i<nsends; i++){ 225*3f457be1SHong Zhang sbuf_nz[i] = nzlocal; 226*3f457be1SHong Zhang ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 227*3f457be1SHong Zhang if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]); 228*3f457be1SHong Zhang } 229*3f457be1SHong Zhang 230*3f457be1SHong Zhang /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 231*3f457be1SHong Zhang count = nrecvs; 232*3f457be1SHong Zhang while (count) { 233*3f457be1SHong Zhang ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 234*3f457be1SHong Zhang recv_rank[imdex] = recv_status.MPI_SOURCE; 235*3f457be1SHong Zhang /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */ 236*3f457be1SHong Zhang ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 237*3f457be1SHong Zhang ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 238*3f457be1SHong Zhang 239*3f457be1SHong Zhang itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 240*3f457be1SHong Zhang rbuf_nz[imdex] += itmp+2; 241*3f457be1SHong Zhang ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 242*3f457be1SHong Zhang ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 243*3f457be1SHong Zhang count--; 244*3f457be1SHong Zhang } 245*3f457be1SHong Zhang 246*3f457be1SHong Zhang /* wait on sends of nzlocal */ 247*3f457be1SHong Zhang if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 248*3f457be1SHong Zhang 249*3f457be1SHong Zhang /* send mat->i,j and mat->a to others, and recv from other's */ 250*3f457be1SHong Zhang /*-----------------------------------------------------------*/ 251*3f457be1SHong Zhang for (i=0; i<nsends; i++){ 252*3f457be1SHong Zhang ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 253*3f457be1SHong Zhang itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 254*3f457be1SHong Zhang ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 255*3f457be1SHong Zhang } 256*3f457be1SHong Zhang 257*3f457be1SHong Zhang /* wait on receives of mat->i,j and mat->a */ 258*3f457be1SHong Zhang /*-----------------------------------------*/ 259*3f457be1SHong Zhang count = nrecvs; 260*3f457be1SHong Zhang while (count) { 261*3f457be1SHong Zhang ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 262*3f457be1SHong Zhang if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 263*3f457be1SHong Zhang ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 264*3f457be1SHong Zhang count--; 265*3f457be1SHong Zhang } 266*3f457be1SHong Zhang 267*3f457be1SHong Zhang /* wait on sends of mat->i,j and mat->a */ 268*3f457be1SHong Zhang /*--------------------------------------*/ 269*3f457be1SHong Zhang if (nsends) { 270*3f457be1SHong Zhang ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 271*3f457be1SHong Zhang ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 272*3f457be1SHong Zhang } 273*3f457be1SHong Zhang ierr = PetscFree(sbuf_nz);CHKERRQ(ierr); 274*3f457be1SHong Zhang ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr); 275*3f457be1SHong Zhang 276*3f457be1SHong Zhang /* create redundant matrix */ 277*3f457be1SHong Zhang /*-------------------------*/ 278*3f457be1SHong Zhang ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 279*3f457be1SHong Zhang ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 280*3f457be1SHong Zhang ierr = MatSetFromOptions(C);CHKERRQ(ierr); 281*3f457be1SHong Zhang 282*3f457be1SHong Zhang /* insert local matrix entries */ 283*3f457be1SHong Zhang rptr = sbuf_j; 284*3f457be1SHong Zhang cols = sbuf_j + rend-rstart + 1; 285*3f457be1SHong Zhang vals = sbuf_a; 286*3f457be1SHong Zhang for (i=0; i<rend-rstart; i++){ 287*3f457be1SHong Zhang row = i + rstart; 288*3f457be1SHong Zhang ncols = rptr[i+1] - rptr[i]; 289*3f457be1SHong Zhang ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 290*3f457be1SHong Zhang vals += ncols; 291*3f457be1SHong Zhang cols += ncols; 292*3f457be1SHong Zhang } 293*3f457be1SHong Zhang /* insert received matrix entries */ 294*3f457be1SHong Zhang for (imdex=0; imdex<nrecvs; imdex++){ 295*3f457be1SHong Zhang 296*3f457be1SHong Zhang rstart = rowrange[recv_rank[imdex]]; 297*3f457be1SHong Zhang rend = rowrange[recv_rank[imdex]+1]; 298*3f457be1SHong Zhang rptr = rbuf_j[imdex]; 299*3f457be1SHong Zhang cols = rbuf_j[imdex] + rend-rstart + 1; 300*3f457be1SHong Zhang vals = rbuf_a[imdex]; 301*3f457be1SHong Zhang for (i=0; i<rend-rstart; i++){ 302*3f457be1SHong Zhang row = i + rstart; 303*3f457be1SHong Zhang ncols = rptr[i+1] - rptr[i]; 304*3f457be1SHong Zhang ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 305*3f457be1SHong Zhang vals += ncols; 306*3f457be1SHong Zhang cols += ncols; 307*3f457be1SHong Zhang } 308*3f457be1SHong Zhang } 309*3f457be1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310*3f457be1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 311*3f457be1SHong Zhang ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 312*3f457be1SHong Zhang if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 313*3f457be1SHong Zhang *matredundant = C; 314*3f457be1SHong Zhang 315*3f457be1SHong Zhang /* free space */ 316*3f457be1SHong Zhang ierr = PetscFree(send_rank);CHKERRQ(ierr); 317*3f457be1SHong Zhang ierr = PetscFree(sbuf_j);CHKERRQ(ierr); 318*3f457be1SHong Zhang ierr = PetscFree(sbuf_a);CHKERRQ(ierr); 319*3f457be1SHong Zhang for (i=0; i<nrecvs; i++){ 320*3f457be1SHong Zhang ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr); 321*3f457be1SHong Zhang ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr); 322*3f457be1SHong Zhang } 323*3f457be1SHong Zhang ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr); 324*3f457be1SHong Zhang PetscFunctionReturn(0); 325*3f457be1SHong Zhang } 326*3f457be1SHong Zhang 3274b9ad928SBarry Smith #undef __FUNCT__ 3284b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_Redundant" 3296849ba73SBarry Smith static PetscErrorCode PCSetUp_Redundant(PC pc) 3304b9ad928SBarry Smith { 3314b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 332dfbe8321SBarry Smith PetscErrorCode ierr; 333*3f457be1SHong Zhang PetscInt mstart,mend,mlocal,m; 33413f74950SBarry Smith PetscMPIInt size; 3354b9ad928SBarry Smith IS isl; 3364b9ad928SBarry Smith MatReuse reuse = MAT_INITIAL_MATRIX; 3374b9ad928SBarry Smith MatStructure str = DIFFERENT_NONZERO_PATTERN; 3384b9ad928SBarry Smith MPI_Comm comm; 33923ce1328SBarry Smith Vec vec; 3404b9ad928SBarry Smith 341*3f457be1SHong Zhang PetscMPIInt rank,size_sub,itmp; 342*3f457be1SHong Zhang PetscInt mlocal_sub; 343*3f457be1SHong Zhang PetscMPIInt subsize,subrank; 344*3f457be1SHong Zhang PetscInt rstart_sub,rend_sub,mloc_sub; 345*3f457be1SHong Zhang Mat Aredundant; 346*3f457be1SHong Zhang 3474b9ad928SBarry Smith PetscFunctionBegin; 348*3f457be1SHong Zhang ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 34923ce1328SBarry Smith ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 3504b9ad928SBarry Smith ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 35123ce1328SBarry Smith ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 3524b9ad928SBarry Smith if (!pc->setupcalled) { 353*3f457be1SHong Zhang /* create working vectors xsub/ysub and xdup/ydup */ 35423ce1328SBarry Smith ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 355*3f457be1SHong Zhang ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 3564b9ad928SBarry Smith 357*3f457be1SHong Zhang #ifdef INTER_COLOR 358*3f457be1SHong Zhang /* get local size of xsub/ysub */ 359*3f457be1SHong Zhang ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 360*3f457be1SHong Zhang ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 361*3f457be1SHong Zhang rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 362*3f457be1SHong Zhang if (subrank+1 < subsize){ 363*3f457be1SHong Zhang rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 364*3f457be1SHong Zhang } else { 365*3f457be1SHong Zhang rend_sub = m; 366*3f457be1SHong Zhang } 367*3f457be1SHong Zhang mloc_sub = rend_sub - rstart_sub; 368*3f457be1SHong Zhang ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 369*3f457be1SHong Zhang /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 370*3f457be1SHong Zhang ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 371*3f457be1SHong Zhang #endif 372*3f457be1SHong Zhang #ifdef CONTIGUOUS_COLOR 373*3f457be1SHong Zhang ierr = VecCreateMPI(red->subcomm,PETSC_DECIDE,m,&red->ysub);CHKERRQ(ierr); 374*3f457be1SHong Zhang ierr = VecGetLocalSize(red->ysub,&mloc_sub);CHKERRQ(ierr); 375*3f457be1SHong Zhang ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,m,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 376*3f457be1SHong Zhang #endif 377*3f457be1SHong Zhang 378*3f457be1SHong Zhang /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 379*3f457be1SHong Zhang Note: we use communicator dupcomm, not pc->comm! */ 380*3f457be1SHong Zhang ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 381*3f457be1SHong Zhang ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 382*3f457be1SHong Zhang 383*3f457be1SHong Zhang /* create vec scatters */ 384*3f457be1SHong Zhang if (!red->scatterin){ 385*3f457be1SHong Zhang IS is1,is2; 386*3f457be1SHong Zhang PetscInt *idx1,*idx2,i,j,k; 387*3f457be1SHong Zhang ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 388*3f457be1SHong Zhang idx2 = idx1 + red->nsubcomm*mlocal; 389*3f457be1SHong Zhang j = 0; 390*3f457be1SHong Zhang for (k=0; k<red->nsubcomm; k++){ 391*3f457be1SHong Zhang for (i=mstart; i<mend; i++){ 392*3f457be1SHong Zhang idx1[j] = i; 393*3f457be1SHong Zhang idx2[j++] = i + m*k; 394*3f457be1SHong Zhang } 395*3f457be1SHong Zhang } 396*3f457be1SHong Zhang ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 397*3f457be1SHong Zhang ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 398*3f457be1SHong Zhang ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 399*3f457be1SHong Zhang ierr = ISDestroy(is1);CHKERRQ(ierr); 400*3f457be1SHong Zhang ierr = ISDestroy(is2);CHKERRQ(ierr); 401*3f457be1SHong Zhang 402*3f457be1SHong Zhang ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 403*3f457be1SHong Zhang ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 404*3f457be1SHong Zhang ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 405*3f457be1SHong Zhang ierr = ISDestroy(is1);CHKERRQ(ierr); 406*3f457be1SHong Zhang ierr = ISDestroy(is2);CHKERRQ(ierr); 407*3f457be1SHong Zhang ierr = PetscFree(idx1);CHKERRQ(ierr); 4084b9ad928SBarry Smith } 4094b9ad928SBarry Smith } 41023ce1328SBarry Smith ierr = VecDestroy(vec);CHKERRQ(ierr); 4114b9ad928SBarry Smith 4124b9ad928SBarry Smith /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 413*3f457be1SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4144b9ad928SBarry Smith if (size == 1) { 4154b9ad928SBarry Smith red->useparallelmat = PETSC_FALSE; 4164b9ad928SBarry Smith } 4174b9ad928SBarry Smith 4184b9ad928SBarry Smith if (red->useparallelmat) { 4194b9ad928SBarry Smith if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 4204b9ad928SBarry Smith /* destroy old matrices */ 4214b9ad928SBarry Smith if (red->pmats) { 4224b9ad928SBarry Smith ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 4234b9ad928SBarry Smith } 4244b9ad928SBarry Smith } else if (pc->setupcalled == 1) { 4254b9ad928SBarry Smith reuse = MAT_REUSE_MATRIX; 4264b9ad928SBarry Smith str = SAME_NONZERO_PATTERN; 4274b9ad928SBarry Smith } 4284b9ad928SBarry Smith 429*3f457be1SHong Zhang /* ================== matrix ============= */ 430*3f457be1SHong Zhang ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 431*3f457be1SHong Zhang ierr = MatGetRedundantMatrix_AIJ(pc,pc->pmat,red->subcomm,mlocal_sub,&Aredundant);CHKERRQ(ierr); 432*3f457be1SHong Zhang 433*3f457be1SHong Zhang /* grab the parallel matrix and put it into processors of a subcomminicator */ 4344b9ad928SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 4354b9ad928SBarry Smith ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr); 4364b9ad928SBarry Smith ierr = ISDestroy(isl);CHKERRQ(ierr); 4374b9ad928SBarry Smith 438*3f457be1SHong Zhang /* ------- temporarily set a mpi matrix pmats_sub- provided by user! --*/ 439*3f457be1SHong Zhang ierr = MatCreate(red->subcomm,&red->pmats_sub);CHKERRQ(ierr); 440*3f457be1SHong Zhang ierr = MatSetSizes(red->pmats_sub,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 441*3f457be1SHong Zhang ierr = MatSetFromOptions(red->pmats_sub);CHKERRQ(ierr); 442*3f457be1SHong Zhang { 443*3f457be1SHong Zhang PetscInt Istart,Iend,ncols,i; 444*3f457be1SHong Zhang const PetscInt *cols; 445*3f457be1SHong Zhang const PetscScalar *vals; 446*3f457be1SHong Zhang PetscTruth flg; 447*3f457be1SHong Zhang ierr = MatGetOwnershipRange(red->pmats_sub,&Istart,&Iend);CHKERRQ(ierr); 448*3f457be1SHong Zhang for (i=Istart; i<Iend; i++) { 449*3f457be1SHong Zhang ierr = MatGetRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr); 450*3f457be1SHong Zhang ierr = MatSetValues(red->pmats_sub,1,&i,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 451*3f457be1SHong Zhang ierr = MatRestoreRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr); 452*3f457be1SHong Zhang } 453*3f457be1SHong Zhang ierr = MatAssemblyBegin(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454*3f457be1SHong Zhang ierr = MatAssemblyEnd(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455*3f457be1SHong Zhang 456*3f457be1SHong Zhang ierr = MatEqual(red->pmats_sub,Aredundant,&flg);CHKERRQ(ierr); 457*3f457be1SHong Zhang if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"pmats_sub !=Aredundant "); 458*3f457be1SHong Zhang ierr = MatDestroy(red->pmats_sub); 459*3f457be1SHong Zhang } 460*3f457be1SHong Zhang red->pmats_sub = Aredundant; 461*3f457be1SHong Zhang 462*3f457be1SHong Zhang /* tell PC of the subcommunicator its operators */ 463*3f457be1SHong Zhang ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr); 4644b9ad928SBarry Smith } else { 4654b9ad928SBarry Smith ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 4664b9ad928SBarry Smith } 4674b9ad928SBarry Smith ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 4684b9ad928SBarry Smith ierr = PCSetUp(red->pc);CHKERRQ(ierr); 4694b9ad928SBarry Smith PetscFunctionReturn(0); 4704b9ad928SBarry Smith } 4714b9ad928SBarry Smith 4724b9ad928SBarry Smith 4734b9ad928SBarry Smith #undef __FUNCT__ 4744b9ad928SBarry Smith #define __FUNCT__ "PCApply_Redundant" 4756849ba73SBarry Smith static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 4764b9ad928SBarry Smith { 4774b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 478dfbe8321SBarry Smith PetscErrorCode ierr; 479*3f457be1SHong Zhang PetscScalar *array; 4804b9ad928SBarry Smith 4814b9ad928SBarry Smith PetscFunctionBegin; 482*3f457be1SHong Zhang /* scatter x to xdup */ 483*3f457be1SHong Zhang ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 484*3f457be1SHong Zhang ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 485*3f457be1SHong Zhang 486*3f457be1SHong Zhang /* place xdup's local array into xsub */ 487*3f457be1SHong Zhang ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 488*3f457be1SHong Zhang ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 4894b9ad928SBarry Smith 4904b9ad928SBarry Smith /* apply preconditioner on each processor */ 491*3f457be1SHong Zhang ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 492*3f457be1SHong Zhang ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 493*3f457be1SHong Zhang ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 4944b9ad928SBarry Smith 495*3f457be1SHong Zhang /* place ysub's local array into ydup */ 496*3f457be1SHong Zhang ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 497*3f457be1SHong Zhang ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 498*3f457be1SHong Zhang 499*3f457be1SHong Zhang /* scatter ydup to y */ 500*3f457be1SHong Zhang ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 501*3f457be1SHong Zhang ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 502*3f457be1SHong Zhang ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 503*3f457be1SHong Zhang ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 5044b9ad928SBarry Smith PetscFunctionReturn(0); 5054b9ad928SBarry Smith } 5064b9ad928SBarry Smith 5074b9ad928SBarry Smith #undef __FUNCT__ 5084b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_Redundant" 5096849ba73SBarry Smith static PetscErrorCode PCDestroy_Redundant(PC pc) 5104b9ad928SBarry Smith { 5114b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 512dfbe8321SBarry Smith PetscErrorCode ierr; 5134b9ad928SBarry Smith 5144b9ad928SBarry Smith PetscFunctionBegin; 5154b9ad928SBarry Smith if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 5164b9ad928SBarry Smith if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 517*3f457be1SHong Zhang if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 518*3f457be1SHong Zhang if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 519*3f457be1SHong Zhang if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 520*3f457be1SHong Zhang if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 521*3f457be1SHong Zhang if (red->pmats_sub) { 522*3f457be1SHong Zhang ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr); 523*3f457be1SHong Zhang } 524*3f457be1SHong Zhang 5254b9ad928SBarry Smith if (red->pmats) { 5264b9ad928SBarry Smith ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 5274b9ad928SBarry Smith } 5284b9ad928SBarry Smith ierr = PCDestroy(red->pc);CHKERRQ(ierr); 5294b9ad928SBarry Smith ierr = PetscFree(red);CHKERRQ(ierr); 5304b9ad928SBarry Smith PetscFunctionReturn(0); 5314b9ad928SBarry Smith } 5324b9ad928SBarry Smith 5334b9ad928SBarry Smith #undef __FUNCT__ 5344b9ad928SBarry Smith #define __FUNCT__ "PCSetFromOptions_Redundant" 5356849ba73SBarry Smith static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 5364b9ad928SBarry Smith { 5374b9ad928SBarry Smith PetscFunctionBegin; 5384b9ad928SBarry Smith PetscFunctionReturn(0); 5394b9ad928SBarry Smith } 5404b9ad928SBarry Smith 5414b9ad928SBarry Smith EXTERN_C_BEGIN 5424b9ad928SBarry Smith #undef __FUNCT__ 5434b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter_Redundant" 544dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 5454b9ad928SBarry Smith { 5464b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 547dfbe8321SBarry Smith PetscErrorCode ierr; 5484b9ad928SBarry Smith 5494b9ad928SBarry Smith PetscFunctionBegin; 5504b9ad928SBarry Smith red->scatterin = in; 5514b9ad928SBarry Smith red->scatterout = out; 5524b9ad928SBarry Smith ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 5534b9ad928SBarry Smith ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 5544b9ad928SBarry Smith PetscFunctionReturn(0); 5554b9ad928SBarry Smith } 5564b9ad928SBarry Smith EXTERN_C_END 5574b9ad928SBarry Smith 5584b9ad928SBarry Smith #undef __FUNCT__ 5594b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter" 5604b9ad928SBarry Smith /*@ 5614b9ad928SBarry Smith PCRedundantSetScatter - Sets the scatter used to copy values into the 5624b9ad928SBarry Smith redundant local solve and the scatter to move them back into the global 5634b9ad928SBarry Smith vector. 5644b9ad928SBarry Smith 5654b9ad928SBarry Smith Collective on PC 5664b9ad928SBarry Smith 5674b9ad928SBarry Smith Input Parameters: 5684b9ad928SBarry Smith + pc - the preconditioner context 5694b9ad928SBarry Smith . in - the scatter to move the values in 5704b9ad928SBarry Smith - out - the scatter to move them out 5714b9ad928SBarry Smith 5724b9ad928SBarry Smith Level: advanced 5734b9ad928SBarry Smith 5744b9ad928SBarry Smith .keywords: PC, redundant solve 5754b9ad928SBarry Smith @*/ 576dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 5774b9ad928SBarry Smith { 578dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 5794b9ad928SBarry Smith 5804b9ad928SBarry Smith PetscFunctionBegin; 5814482741eSBarry Smith PetscValidHeaderSpecific(pc,PC_COOKIE,1); 5824482741eSBarry Smith PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 5834482741eSBarry Smith PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 5844b9ad928SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 5854b9ad928SBarry Smith if (f) { 5864b9ad928SBarry Smith ierr = (*f)(pc,in,out);CHKERRQ(ierr); 5874b9ad928SBarry Smith } 5884b9ad928SBarry Smith PetscFunctionReturn(0); 5894b9ad928SBarry Smith } 5904b9ad928SBarry Smith 5914b9ad928SBarry Smith EXTERN_C_BEGIN 5924b9ad928SBarry Smith #undef __FUNCT__ 5934b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC_Redundant" 594dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 5954b9ad928SBarry Smith { 5964b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 5974b9ad928SBarry Smith 5984b9ad928SBarry Smith PetscFunctionBegin; 5994b9ad928SBarry Smith *innerpc = red->pc; 6004b9ad928SBarry Smith PetscFunctionReturn(0); 6014b9ad928SBarry Smith } 6024b9ad928SBarry Smith EXTERN_C_END 6034b9ad928SBarry Smith 6044b9ad928SBarry Smith #undef __FUNCT__ 6054b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC" 6064b9ad928SBarry Smith /*@ 6074b9ad928SBarry Smith PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 6084b9ad928SBarry Smith 6094b9ad928SBarry Smith Not Collective 6104b9ad928SBarry Smith 6114b9ad928SBarry Smith Input Parameter: 6124b9ad928SBarry Smith . pc - the preconditioner context 6134b9ad928SBarry Smith 6144b9ad928SBarry Smith Output Parameter: 6154b9ad928SBarry Smith . innerpc - the sequential PC 6164b9ad928SBarry Smith 6174b9ad928SBarry Smith Level: advanced 6184b9ad928SBarry Smith 6194b9ad928SBarry Smith .keywords: PC, redundant solve 6204b9ad928SBarry Smith @*/ 621dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 6224b9ad928SBarry Smith { 623dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(PC,PC*); 6244b9ad928SBarry Smith 6254b9ad928SBarry Smith PetscFunctionBegin; 6264482741eSBarry Smith PetscValidHeaderSpecific(pc,PC_COOKIE,1); 6274482741eSBarry Smith PetscValidPointer(innerpc,2); 6284b9ad928SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 6294b9ad928SBarry Smith if (f) { 6304b9ad928SBarry Smith ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 6314b9ad928SBarry Smith } 6324b9ad928SBarry Smith PetscFunctionReturn(0); 6334b9ad928SBarry Smith } 6344b9ad928SBarry Smith 6354b9ad928SBarry Smith EXTERN_C_BEGIN 6364b9ad928SBarry Smith #undef __FUNCT__ 6374b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators_Redundant" 638dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 6394b9ad928SBarry Smith { 6404b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 6414b9ad928SBarry Smith 6424b9ad928SBarry Smith PetscFunctionBegin; 6434b9ad928SBarry Smith if (mat) *mat = red->pmats[0]; 6444b9ad928SBarry Smith if (pmat) *pmat = red->pmats[0]; 6454b9ad928SBarry Smith PetscFunctionReturn(0); 6464b9ad928SBarry Smith } 6474b9ad928SBarry Smith EXTERN_C_END 6484b9ad928SBarry Smith 6494b9ad928SBarry Smith #undef __FUNCT__ 6504b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators" 6514b9ad928SBarry Smith /*@ 6524b9ad928SBarry Smith PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 6534b9ad928SBarry Smith 6544b9ad928SBarry Smith Not Collective 6554b9ad928SBarry Smith 6564b9ad928SBarry Smith Input Parameter: 6574b9ad928SBarry Smith . pc - the preconditioner context 6584b9ad928SBarry Smith 6594b9ad928SBarry Smith Output Parameters: 6604b9ad928SBarry Smith + mat - the matrix 6614b9ad928SBarry Smith - pmat - the (possibly different) preconditioner matrix 6624b9ad928SBarry Smith 6634b9ad928SBarry Smith Level: advanced 6644b9ad928SBarry Smith 6654b9ad928SBarry Smith .keywords: PC, redundant solve 6664b9ad928SBarry Smith @*/ 667dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 6684b9ad928SBarry Smith { 669dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 6704b9ad928SBarry Smith 6714b9ad928SBarry Smith PetscFunctionBegin; 6724482741eSBarry Smith PetscValidHeaderSpecific(pc,PC_COOKIE,1); 6734482741eSBarry Smith if (mat) PetscValidPointer(mat,2); 6744482741eSBarry Smith if (pmat) PetscValidPointer(pmat,3); 6754b9ad928SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 6764b9ad928SBarry Smith if (f) { 6774b9ad928SBarry Smith ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 6784b9ad928SBarry Smith } 6794b9ad928SBarry Smith PetscFunctionReturn(0); 6804b9ad928SBarry Smith } 6814b9ad928SBarry Smith 6824b9ad928SBarry Smith /* -------------------------------------------------------------------------------------*/ 68337a17b4dSBarry Smith /*MC 68437a17b4dSBarry Smith PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 68537a17b4dSBarry Smith 68637a17b4dSBarry Smith 68737a17b4dSBarry Smith Options for the redundant preconditioners can be set with -redundant_pc_xxx 68837a17b4dSBarry Smith 68937a17b4dSBarry Smith Level: intermediate 69037a17b4dSBarry Smith 69137a17b4dSBarry Smith 69237a17b4dSBarry Smith .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 69337a17b4dSBarry Smith PCRedundantGetPC(), PCRedundantGetOperators() 69437a17b4dSBarry Smith 69537a17b4dSBarry Smith M*/ 69637a17b4dSBarry Smith 6974b9ad928SBarry Smith EXTERN_C_BEGIN 6984b9ad928SBarry Smith #undef __FUNCT__ 6994b9ad928SBarry Smith #define __FUNCT__ "PCCreate_Redundant" 700dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 7014b9ad928SBarry Smith { 702dfbe8321SBarry Smith PetscErrorCode ierr; 7034b9ad928SBarry Smith PC_Redundant *red; 704e060cb09SBarry Smith const char *prefix; 7054b9ad928SBarry Smith 706*3f457be1SHong Zhang PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 707*3f457be1SHong Zhang PetscMPIInt rank,size,subrank,*subsize; 708*3f457be1SHong Zhang MPI_Comm subcomm; 709*3f457be1SHong Zhang PetscMPIInt duprank; 710*3f457be1SHong Zhang PetscMPIInt rank_dup,size_dup; 711*3f457be1SHong Zhang MPI_Comm dupcomm; 712*3f457be1SHong Zhang 7134b9ad928SBarry Smith PetscFunctionBegin; 7144b9ad928SBarry Smith ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 71552e6d16bSBarry Smith ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 7164b9ad928SBarry Smith red->useparallelmat = PETSC_TRUE; 7174b9ad928SBarry Smith 718*3f457be1SHong Zhang ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 719*3f457be1SHong Zhang ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 720*3f457be1SHong Zhang nsubcomm = size; 721*3f457be1SHong Zhang ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 722*3f457be1SHong Zhang if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 723*3f457be1SHong Zhang 724*3f457be1SHong Zhang /* get size of each subcommunicators */ 725*3f457be1SHong Zhang ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 726*3f457be1SHong Zhang np_subcomm = size/nsubcomm; 727*3f457be1SHong Zhang nleftover = size - nsubcomm*np_subcomm; 728*3f457be1SHong Zhang for (i=0; i<nsubcomm; i++){ 729*3f457be1SHong Zhang subsize[i] = np_subcomm; 730*3f457be1SHong Zhang if (i<nleftover) subsize[i]++; 731*3f457be1SHong Zhang } 732*3f457be1SHong Zhang 733*3f457be1SHong Zhang /* find color for this proc */ 734*3f457be1SHong Zhang #ifdef INTER_COLOR 735*3f457be1SHong Zhang color = rank%nsubcomm; 736*3f457be1SHong Zhang subrank = rank/nsubcomm; 737*3f457be1SHong Zhang #endif 738*3f457be1SHong Zhang 739*3f457be1SHong Zhang #ifdef CONTIGUOUS_COLOR 740*3f457be1SHong Zhang color = 0; subrank = 0; i = 0; j=0; 741*3f457be1SHong Zhang while (i < size){ 742*3f457be1SHong Zhang if (rank == i) break; /* my color is found */ 743*3f457be1SHong Zhang if (j >= subsize[color]-1){ /* next subcomm */ 744*3f457be1SHong Zhang j = -1; subrank = -1; color++; 745*3f457be1SHong Zhang } 746*3f457be1SHong Zhang i++; j++; subrank++; 747*3f457be1SHong Zhang } 748*3f457be1SHong Zhang #endif 749*3f457be1SHong Zhang 750*3f457be1SHong Zhang ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 751*3f457be1SHong Zhang red->subcomm = subcomm; 752*3f457be1SHong Zhang red->color = color; 753*3f457be1SHong Zhang red->nsubcomm = nsubcomm; 754*3f457be1SHong Zhang 755*3f457be1SHong Zhang j = 0; duprank = 0; 756*3f457be1SHong Zhang for (i=0; i<nsubcomm; i++){ 757*3f457be1SHong Zhang if (j == color){ 758*3f457be1SHong Zhang duprank += subrank; 759*3f457be1SHong Zhang break; 760*3f457be1SHong Zhang } 761*3f457be1SHong Zhang duprank += subsize[i]; j++; 762*3f457be1SHong Zhang } 763*3f457be1SHong Zhang /* 764*3f457be1SHong Zhang ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 765*3f457be1SHong Zhang ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 766*3f457be1SHong Zhang */ 767*3f457be1SHong Zhang 768*3f457be1SHong Zhang ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 769*3f457be1SHong Zhang ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 770*3f457be1SHong Zhang ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 771*3f457be1SHong Zhang /* 772*3f457be1SHong Zhang ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 773*3f457be1SHong Zhang ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 774*3f457be1SHong Zhang */ 775*3f457be1SHong Zhang red->dupcomm = dupcomm; 776*3f457be1SHong Zhang ierr = PetscFree(subsize);CHKERRQ(ierr); 777*3f457be1SHong Zhang 7784b9ad928SBarry Smith /* create the sequential PC that each processor has copy of */ 779*3f457be1SHong Zhang ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 7804b9ad928SBarry Smith ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 7814b9ad928SBarry Smith ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 7824b9ad928SBarry Smith ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 7834b9ad928SBarry Smith ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 7844b9ad928SBarry Smith 7854b9ad928SBarry Smith pc->ops->apply = PCApply_Redundant; 7864b9ad928SBarry Smith pc->ops->applytranspose = 0; 7874b9ad928SBarry Smith pc->ops->setup = PCSetUp_Redundant; 7884b9ad928SBarry Smith pc->ops->destroy = PCDestroy_Redundant; 7894b9ad928SBarry Smith pc->ops->setfromoptions = PCSetFromOptions_Redundant; 7904b9ad928SBarry Smith pc->ops->setuponblocks = 0; 7914b9ad928SBarry Smith pc->ops->view = PCView_Redundant; 7924b9ad928SBarry Smith pc->ops->applyrichardson = 0; 7934b9ad928SBarry Smith 7944b9ad928SBarry Smith pc->data = (void*)red; 7954b9ad928SBarry Smith 7964b9ad928SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 7974b9ad928SBarry Smith PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 7984b9ad928SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 7994b9ad928SBarry Smith PCRedundantGetPC_Redundant);CHKERRQ(ierr); 8004b9ad928SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 8014b9ad928SBarry Smith PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 8024b9ad928SBarry Smith 8034b9ad928SBarry Smith PetscFunctionReturn(0); 8044b9ad928SBarry Smith } 8054b9ad928SBarry Smith EXTERN_C_END 806