1dba47a55SKris Buschelman #define PETSCKSP_DLL 2dba47a55SKris Buschelman 34b9ad928SBarry Smith /* 43f457be1SHong Zhang This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 54b9ad928SBarry Smith */ 66356e834SBarry Smith #include "private/pcimpl.h" /*I "petscpc.h" I*/ 74b9ad928SBarry Smith #include "petscksp.h" 84b9ad928SBarry Smith 94b9ad928SBarry Smith typedef struct { 104b9ad928SBarry Smith PC pc; /* actual preconditioner used on each processor */ 113f457be1SHong Zhang Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 123f457be1SHong Zhang Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13b3804887SHong Zhang Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 143f457be1SHong Zhang VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 154b9ad928SBarry Smith PetscTruth useparallelmat; 163f457be1SHong Zhang MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 173f457be1SHong Zhang MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 183f457be1SHong Zhang that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 193f457be1SHong Zhang PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 203f457be1SHong Zhang PetscInt color; /* color of processors in a subcommunicator */ 214b9ad928SBarry Smith } PC_Redundant; 224b9ad928SBarry Smith 234b9ad928SBarry Smith #undef __FUNCT__ 244b9ad928SBarry Smith #define __FUNCT__ "PCView_Redundant" 256849ba73SBarry Smith static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 264b9ad928SBarry Smith { 274b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 28dfbe8321SBarry Smith PetscErrorCode ierr; 2913f74950SBarry Smith PetscMPIInt rank; 3032077d6dSBarry Smith PetscTruth iascii,isstring; 314b9ad928SBarry Smith PetscViewer sviewer; 324b9ad928SBarry Smith 334b9ad928SBarry Smith PetscFunctionBegin; 344b9ad928SBarry Smith ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 3532077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 364b9ad928SBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 3732077d6dSBarry Smith if (iascii) { 384b9ad928SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 394b9ad928SBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 404b9ad928SBarry Smith if (!rank) { 414b9ad928SBarry Smith ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 424b9ad928SBarry Smith ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 434b9ad928SBarry Smith ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 444b9ad928SBarry Smith } 454b9ad928SBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 464b9ad928SBarry Smith } else if (isstring) { 474b9ad928SBarry Smith ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 484b9ad928SBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 494b9ad928SBarry Smith if (!rank) { 504b9ad928SBarry Smith ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 514b9ad928SBarry Smith } 524b9ad928SBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 534b9ad928SBarry Smith } else { 5479a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 554b9ad928SBarry Smith } 564b9ad928SBarry Smith PetscFunctionReturn(0); 574b9ad928SBarry Smith } 584b9ad928SBarry Smith 593f457be1SHong Zhang #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 603f457be1SHong Zhang #include "private/vecimpl.h" 613f457be1SHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 623f457be1SHong Zhang #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 633f457be1SHong Zhang 64b3804887SHong Zhang typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 65b3804887SHong Zhang PetscInt nzlocal,nsends,nrecvs; 66b3804887SHong Zhang PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 67b3804887SHong Zhang PetscScalar *sbuf_a,**rbuf_a; 68b3804887SHong Zhang PetscErrorCode (*MatDestroy)(Mat); 69b3804887SHong Zhang } Mat_Redundant; 70b3804887SHong Zhang 71b3804887SHong Zhang #undef __FUNCT__ 72b3804887SHong Zhang #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant" 73b3804887SHong Zhang PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr) 74b3804887SHong Zhang { 75b3804887SHong Zhang PetscErrorCode ierr; 76b3804887SHong Zhang Mat_Redundant *redund=(Mat_Redundant*)ptr; 77b3804887SHong Zhang PetscInt i; 78b3804887SHong Zhang 79b3804887SHong Zhang PetscFunctionBegin; 80b3804887SHong Zhang ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 81b3804887SHong Zhang ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 82b3804887SHong Zhang ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 83b3804887SHong Zhang for (i=0; i<redund->nrecvs; i++){ 84b3804887SHong Zhang ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 85b3804887SHong Zhang ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 86b3804887SHong Zhang } 87b3804887SHong Zhang ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 88b3804887SHong Zhang ierr = PetscFree(redund);CHKERRQ(ierr); 89b3804887SHong Zhang PetscFunctionReturn(0); 90b3804887SHong Zhang } 91b3804887SHong Zhang 92b3804887SHong Zhang #undef __FUNCT__ 93b3804887SHong Zhang #define __FUNCT__ "MatDestroy_MatRedundant" 94b3804887SHong Zhang PetscErrorCode MatDestroy_MatRedundant(Mat A) 95b3804887SHong Zhang { 96b3804887SHong Zhang PetscErrorCode ierr; 97b3804887SHong Zhang PetscObjectContainer container; 98b3804887SHong Zhang Mat_Redundant *redund=PETSC_NULL; 99b3804887SHong Zhang 100b3804887SHong Zhang PetscFunctionBegin; 101b3804887SHong Zhang ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 102b3804887SHong Zhang if (container) { 103b3804887SHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 104b3804887SHong Zhang } else { 105b3804887SHong Zhang SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 106b3804887SHong Zhang } 107b3804887SHong Zhang A->ops->destroy = redund->MatDestroy; 108b3804887SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 109b3804887SHong Zhang ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 110b3804887SHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 111b3804887SHong Zhang PetscFunctionReturn(0); 112b3804887SHong Zhang } 113b3804887SHong Zhang 1143f457be1SHong Zhang #undef __FUNCT__ 1153f457be1SHong Zhang #define __FUNCT__ "MatGetRedundantMatrix" 116f664ae05SHong Zhang PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1173f457be1SHong Zhang { 118b3804887SHong Zhang PetscMPIInt rank,size; 1193f457be1SHong Zhang MPI_Comm comm=mat->comm; 1203f457be1SHong Zhang PetscErrorCode ierr; 121b3804887SHong Zhang PetscInt nsends,nrecvs,i,prid=100,rownz_max; 1223f457be1SHong Zhang PetscMPIInt *send_rank,*recv_rank; 123b3804887SHong Zhang PetscInt *rowrange=mat->rmap.range; 1243f457be1SHong Zhang Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 125b3804887SHong Zhang Mat A=aij->A,B=aij->B,C=*matredundant; 1263f457be1SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1273f457be1SHong Zhang PetscScalar *sbuf_a; 128b3804887SHong Zhang PetscInt nzlocal=a->nz+b->nz; 129b3804887SHong Zhang PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1303f457be1SHong Zhang PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 131b3804887SHong Zhang PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1323f457be1SHong Zhang PetscScalar *vals,*aworkA,*aworkB; 1333f457be1SHong Zhang PetscMPIInt tag1,tag2,tag3,imdex; 1343f457be1SHong Zhang MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 1353f457be1SHong Zhang MPI_Status recv_status,*send_status; 1363f457be1SHong Zhang PetscInt *sbuf_nz,*rbuf_nz,count; 1373f457be1SHong Zhang PetscInt **rbuf_j; 1383f457be1SHong Zhang PetscScalar **rbuf_a; 139b3804887SHong Zhang Mat_Redundant *redund=PETSC_NULL; 140b3804887SHong Zhang PetscObjectContainer container; 1413f457be1SHong Zhang 1423f457be1SHong Zhang PetscFunctionBegin; 1433f457be1SHong Zhang ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 1443f457be1SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1453f457be1SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 146b3804887SHong Zhang 147b3804887SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 148b3804887SHong Zhang ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 149b3804887SHong Zhang if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 150b3804887SHong Zhang ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 151b3804887SHong Zhang if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 152b3804887SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 153b3804887SHong Zhang if (container) { 154b3804887SHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 155b3804887SHong Zhang } else { 156b3804887SHong Zhang SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 157b3804887SHong Zhang } 158b3804887SHong Zhang if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 159b3804887SHong Zhang 160b3804887SHong Zhang nsends = redund->nsends; 161b3804887SHong Zhang nrecvs = redund->nrecvs; 162b3804887SHong Zhang send_rank = redund->send_rank; recv_rank = send_rank + size; 163b3804887SHong Zhang sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 164b3804887SHong Zhang sbuf_j = redund->sbuf_j; 165b3804887SHong Zhang sbuf_a = redund->sbuf_a; 166b3804887SHong Zhang rbuf_j = redund->rbuf_j; 167b3804887SHong Zhang rbuf_a = redund->rbuf_a; 168b3804887SHong Zhang } 169b3804887SHong Zhang 170b3804887SHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 171b3804887SHong Zhang PetscMPIInt subrank,subsize; 172b3804887SHong Zhang PetscInt nleftover,np_subcomm; 173b3804887SHong Zhang /* get the destination processors' id send_rank, nsends and nrecvs */ 1743f457be1SHong Zhang ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1753f457be1SHong Zhang ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1763f457be1SHong Zhang ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1773f457be1SHong Zhang recv_rank = send_rank + size; 1783f457be1SHong Zhang np_subcomm = size/nsubcomm; 1793f457be1SHong Zhang nleftover = size - nsubcomm*np_subcomm; 1803f457be1SHong Zhang nsends = 0; nrecvs = 0; 1813f457be1SHong Zhang for (i=0; i<size; i++){ /* i=rank*/ 1823f457be1SHong Zhang if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1833f457be1SHong Zhang send_rank[nsends] = i; nsends++; 1843f457be1SHong Zhang recv_rank[nrecvs++] = i; 1853f457be1SHong Zhang } 1863f457be1SHong Zhang } 1873f457be1SHong Zhang if (rank >= size - nleftover){/* this proc is a leftover processor */ 1883f457be1SHong Zhang i = size-nleftover-1; 1893f457be1SHong Zhang j = 0; 1903f457be1SHong Zhang while (j < nsubcomm - nleftover){ 1913f457be1SHong Zhang send_rank[nsends++] = i; 1923f457be1SHong Zhang i--; j++; 1933f457be1SHong Zhang } 1943f457be1SHong Zhang } 1953f457be1SHong Zhang 1963f457be1SHong Zhang if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1973f457be1SHong Zhang for (i=0; i<nleftover; i++){ 1983f457be1SHong Zhang recv_rank[nrecvs++] = size-nleftover+i; 1993f457be1SHong Zhang } 2003f457be1SHong Zhang } 2013f457be1SHong Zhang 202b3804887SHong Zhang /* allocate sbuf_j, sbuf_a */ 203b3804887SHong Zhang i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 204b3804887SHong Zhang ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2053f457be1SHong Zhang ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 206b3804887SHong Zhang } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2073f457be1SHong Zhang 208b3804887SHong Zhang /* copy mat's local entries into the buffers */ 209b3804887SHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 210b3804887SHong Zhang rownz_max = 0; 2113f457be1SHong Zhang rptr = sbuf_j; 2123f457be1SHong Zhang cols = sbuf_j + rend-rstart + 1; 2133f457be1SHong Zhang vals = sbuf_a; 2143f457be1SHong Zhang rptr[0] = 0; 2153f457be1SHong Zhang for (i=0; i<rend-rstart; i++){ 2163f457be1SHong Zhang row = i + rstart; 2173f457be1SHong Zhang nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2183f457be1SHong Zhang ncols = nzA + nzB; 2193f457be1SHong Zhang cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2203f457be1SHong Zhang aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2213f457be1SHong Zhang /* load the column indices for this row into cols */ 2223f457be1SHong Zhang lwrite = 0; 2233f457be1SHong Zhang for (l=0; l<nzB; l++) { 2243f457be1SHong Zhang if ((ctmp = bmap[cworkB[l]]) < cstart){ 2253f457be1SHong Zhang vals[lwrite] = aworkB[l]; 2263f457be1SHong Zhang cols[lwrite++] = ctmp; 2273f457be1SHong Zhang } 2283f457be1SHong Zhang } 2293f457be1SHong Zhang for (l=0; l<nzA; l++){ 2303f457be1SHong Zhang vals[lwrite] = aworkA[l]; 2313f457be1SHong Zhang cols[lwrite++] = cstart + cworkA[l]; 2323f457be1SHong Zhang } 2333f457be1SHong Zhang for (l=0; l<nzB; l++) { 2343f457be1SHong Zhang if ((ctmp = bmap[cworkB[l]]) >= cend){ 2353f457be1SHong Zhang vals[lwrite] = aworkB[l]; 2363f457be1SHong Zhang cols[lwrite++] = ctmp; 2373f457be1SHong Zhang } 2383f457be1SHong Zhang } 2393f457be1SHong Zhang vals += ncols; 2403f457be1SHong Zhang cols += ncols; 2413f457be1SHong Zhang rptr[i+1] = rptr[i] + ncols; 242b3804887SHong Zhang if (rownz_max < ncols) rownz_max = ncols; 2433f457be1SHong Zhang } 2443f457be1SHong Zhang if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 245b3804887SHong Zhang } else { /* only copy matrix values into sbuf_a */ 246b3804887SHong Zhang rptr = sbuf_j; 247b3804887SHong Zhang vals = sbuf_a; 248b3804887SHong Zhang rptr[0] = 0; 249b3804887SHong Zhang for (i=0; i<rend-rstart; i++){ 250b3804887SHong Zhang row = i + rstart; 251b3804887SHong Zhang nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 252b3804887SHong Zhang ncols = nzA + nzB; 253b3804887SHong Zhang cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 254b3804887SHong Zhang aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 255b3804887SHong Zhang lwrite = 0; 256b3804887SHong Zhang for (l=0; l<nzB; l++) { 257b3804887SHong Zhang if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 258b3804887SHong Zhang } 259b3804887SHong Zhang for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 260b3804887SHong Zhang for (l=0; l<nzB; l++) { 261b3804887SHong Zhang if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 262b3804887SHong Zhang } 263b3804887SHong Zhang vals += ncols; 264b3804887SHong Zhang rptr[i+1] = rptr[i] + ncols; 265b3804887SHong Zhang } 266b3804887SHong Zhang } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2673f457be1SHong Zhang 2683f457be1SHong Zhang /* send nzlocal to others, and recv other's nzlocal */ 2693f457be1SHong Zhang /*--------------------------------------------------*/ 270b3804887SHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 271b3804887SHong Zhang ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 272b3804887SHong Zhang s_waits2 = s_waits3 + nsends; 273b3804887SHong Zhang s_waits1 = s_waits2 + nsends; 274b3804887SHong Zhang r_waits1 = s_waits1 + nsends; 2753f457be1SHong Zhang r_waits2 = r_waits1 + nrecvs; 2763f457be1SHong Zhang r_waits3 = r_waits2 + nrecvs; 277b3804887SHong Zhang } else { 278b3804887SHong Zhang ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 279b3804887SHong Zhang r_waits3 = s_waits3 + nsends; 280b3804887SHong Zhang } 2813f457be1SHong Zhang 282b3804887SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 283b3804887SHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 284b3804887SHong Zhang /* get new tags to keep the communication clean */ 285b3804887SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 286b3804887SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 287b3804887SHong Zhang ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 288b3804887SHong Zhang rbuf_nz = sbuf_nz + nsends; 2893f457be1SHong Zhang 2903f457be1SHong Zhang /* post receives of other's nzlocal */ 2913f457be1SHong Zhang for (i=0; i<nrecvs; i++){ 2923f457be1SHong Zhang ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2933f457be1SHong Zhang } 2943f457be1SHong Zhang /* send nzlocal to others */ 2953f457be1SHong Zhang for (i=0; i<nsends; i++){ 2963f457be1SHong Zhang sbuf_nz[i] = nzlocal; 2973f457be1SHong Zhang ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2983f457be1SHong Zhang } 2993f457be1SHong Zhang /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 3003f457be1SHong Zhang count = nrecvs; 3013f457be1SHong Zhang while (count) { 3023f457be1SHong Zhang ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 3033f457be1SHong Zhang recv_rank[imdex] = recv_status.MPI_SOURCE; 304b3804887SHong Zhang /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 3053f457be1SHong Zhang ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 3063f457be1SHong Zhang 307b3804887SHong Zhang i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 308b3804887SHong Zhang rbuf_nz[imdex] += i + 2; 3093f457be1SHong Zhang ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 3103f457be1SHong Zhang ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 3113f457be1SHong Zhang count--; 3123f457be1SHong Zhang } 3133f457be1SHong Zhang /* wait on sends of nzlocal */ 3143f457be1SHong Zhang if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 315b3804887SHong Zhang /* send mat->i,j to others, and recv from other's */ 316b3804887SHong Zhang /*------------------------------------------------*/ 317b3804887SHong Zhang for (i=0; i<nsends; i++){ 318b3804887SHong Zhang j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 319b3804887SHong Zhang ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 320b3804887SHong Zhang } 321b3804887SHong Zhang /* wait on receives of mat->i,j */ 322b3804887SHong Zhang /*------------------------------*/ 323b3804887SHong Zhang count = nrecvs; 324b3804887SHong Zhang while (count) { 325b3804887SHong Zhang ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 326b3804887SHong Zhang if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 327b3804887SHong Zhang count--; 328b3804887SHong Zhang } 329b3804887SHong Zhang /* wait on sends of mat->i,j */ 330b3804887SHong Zhang /*---------------------------*/ 331b3804887SHong Zhang if (nsends) { 332b3804887SHong Zhang ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 333b3804887SHong Zhang } 334b3804887SHong Zhang } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3353f457be1SHong Zhang 336b3804887SHong Zhang /* post receives, send and receive mat->a */ 337b3804887SHong Zhang /*----------------------------------------*/ 338b3804887SHong Zhang for (imdex=0; imdex<nrecvs; imdex++) { 339b3804887SHong Zhang ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 340b3804887SHong Zhang } 3413f457be1SHong Zhang for (i=0; i<nsends; i++){ 3423f457be1SHong Zhang ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 3433f457be1SHong Zhang } 3443f457be1SHong Zhang count = nrecvs; 3453f457be1SHong Zhang while (count) { 3463f457be1SHong Zhang ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 3473f457be1SHong Zhang if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 3483f457be1SHong Zhang count--; 3493f457be1SHong Zhang } 3503f457be1SHong Zhang if (nsends) { 3513f457be1SHong Zhang ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 3523f457be1SHong Zhang } 353b3804887SHong Zhang 354b3804887SHong Zhang ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 3553f457be1SHong Zhang 3563f457be1SHong Zhang /* create redundant matrix */ 3573f457be1SHong Zhang /*-------------------------*/ 3580ae51fcdSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 359b3804887SHong Zhang /* compute rownz_max for preallocation */ 360b3804887SHong Zhang for (imdex=0; imdex<nrecvs; imdex++){ 361b3804887SHong Zhang j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 362b3804887SHong Zhang rptr = rbuf_j[imdex]; 363b3804887SHong Zhang for (i=0; i<j; i++){ 364b3804887SHong Zhang ncols = rptr[i+1] - rptr[i]; 365b3804887SHong Zhang if (rownz_max < ncols) rownz_max = ncols; 366b3804887SHong Zhang } 367b3804887SHong Zhang } 368b3804887SHong Zhang 3693f457be1SHong Zhang ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 3703f457be1SHong Zhang ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3713f457be1SHong Zhang ierr = MatSetFromOptions(C);CHKERRQ(ierr); 372b3804887SHong Zhang ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 373b3804887SHong Zhang ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 3740ae51fcdSHong Zhang } else { 3750ae51fcdSHong Zhang C = *matredundant; 3760ae51fcdSHong Zhang } 377b3804887SHong Zhang 3783f457be1SHong Zhang /* insert local matrix entries */ 3793f457be1SHong Zhang rptr = sbuf_j; 3803f457be1SHong Zhang cols = sbuf_j + rend-rstart + 1; 3813f457be1SHong Zhang vals = sbuf_a; 3823f457be1SHong Zhang for (i=0; i<rend-rstart; i++){ 3833f457be1SHong Zhang row = i + rstart; 3843f457be1SHong Zhang ncols = rptr[i+1] - rptr[i]; 3853f457be1SHong Zhang ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3863f457be1SHong Zhang vals += ncols; 3873f457be1SHong Zhang cols += ncols; 3883f457be1SHong Zhang } 3893f457be1SHong Zhang /* insert received matrix entries */ 3903f457be1SHong Zhang for (imdex=0; imdex<nrecvs; imdex++){ 3913f457be1SHong Zhang rstart = rowrange[recv_rank[imdex]]; 3923f457be1SHong Zhang rend = rowrange[recv_rank[imdex]+1]; 3933f457be1SHong Zhang rptr = rbuf_j[imdex]; 3943f457be1SHong Zhang cols = rbuf_j[imdex] + rend-rstart + 1; 3953f457be1SHong Zhang vals = rbuf_a[imdex]; 3963f457be1SHong Zhang for (i=0; i<rend-rstart; i++){ 3973f457be1SHong Zhang row = i + rstart; 3983f457be1SHong Zhang ncols = rptr[i+1] - rptr[i]; 3993f457be1SHong Zhang ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4003f457be1SHong Zhang vals += ncols; 4013f457be1SHong Zhang cols += ncols; 4023f457be1SHong Zhang } 4033f457be1SHong Zhang } 4043f457be1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4053f457be1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4063f457be1SHong Zhang ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 4073f457be1SHong Zhang if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 4080ae51fcdSHong Zhang if (reuse == MAT_INITIAL_MATRIX){ 409b3804887SHong Zhang PetscObjectContainer container; 4103f457be1SHong Zhang *matredundant = C; 411b3804887SHong Zhang /* create a supporting struct and attach it to C for reuse */ 412b3804887SHong Zhang ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 413b3804887SHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 414b3804887SHong Zhang ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr); 415b3804887SHong Zhang ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 416b3804887SHong Zhang ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr); 4173f457be1SHong Zhang 418b3804887SHong Zhang redund->nzlocal = nzlocal; 419b3804887SHong Zhang redund->nsends = nsends; 420b3804887SHong Zhang redund->nrecvs = nrecvs; 421b3804887SHong Zhang redund->send_rank = send_rank; 422b3804887SHong Zhang redund->sbuf_nz = sbuf_nz; 423b3804887SHong Zhang redund->sbuf_j = sbuf_j; 424b3804887SHong Zhang redund->sbuf_a = sbuf_a; 425b3804887SHong Zhang redund->rbuf_j = rbuf_j; 426b3804887SHong Zhang redund->rbuf_a = rbuf_a; 427b3804887SHong Zhang 428b3804887SHong Zhang redund->MatDestroy = C->ops->destroy; 429b3804887SHong Zhang C->ops->destroy = MatDestroy_MatRedundant; 4303f457be1SHong Zhang } 4313f457be1SHong Zhang PetscFunctionReturn(0); 4323f457be1SHong Zhang } 4333f457be1SHong Zhang 4344b9ad928SBarry Smith #undef __FUNCT__ 4354b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_Redundant" 4366849ba73SBarry Smith static PetscErrorCode PCSetUp_Redundant(PC pc) 4374b9ad928SBarry Smith { 4384b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 439dfbe8321SBarry Smith PetscErrorCode ierr; 4403f457be1SHong Zhang PetscInt mstart,mend,mlocal,m; 44113f74950SBarry Smith PetscMPIInt size; 4424b9ad928SBarry Smith MatReuse reuse = MAT_INITIAL_MATRIX; 4434b9ad928SBarry Smith MatStructure str = DIFFERENT_NONZERO_PATTERN; 4444b9ad928SBarry Smith MPI_Comm comm; 44523ce1328SBarry Smith Vec vec; 4464b9ad928SBarry Smith 4473f457be1SHong Zhang PetscInt mlocal_sub; 4483f457be1SHong Zhang PetscMPIInt subsize,subrank; 4493f457be1SHong Zhang PetscInt rstart_sub,rend_sub,mloc_sub; 4503f457be1SHong Zhang 4514b9ad928SBarry Smith PetscFunctionBegin; 4523f457be1SHong Zhang ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 45323ce1328SBarry Smith ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 4544b9ad928SBarry Smith ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 45523ce1328SBarry Smith ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 4564b9ad928SBarry Smith if (!pc->setupcalled) { 4573f457be1SHong Zhang /* create working vectors xsub/ysub and xdup/ydup */ 45823ce1328SBarry Smith ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 4593f457be1SHong Zhang ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 4604b9ad928SBarry Smith 4613f457be1SHong Zhang /* get local size of xsub/ysub */ 4623f457be1SHong Zhang ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 4633f457be1SHong Zhang ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 4643f457be1SHong Zhang rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 4653f457be1SHong Zhang if (subrank+1 < subsize){ 4663f457be1SHong Zhang rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 4673f457be1SHong Zhang } else { 4683f457be1SHong Zhang rend_sub = m; 4693f457be1SHong Zhang } 4703f457be1SHong Zhang mloc_sub = rend_sub - rstart_sub; 4713f457be1SHong Zhang ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 4723f457be1SHong Zhang /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 4733f457be1SHong Zhang ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 4743f457be1SHong Zhang 4753f457be1SHong Zhang /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 4763f457be1SHong Zhang Note: we use communicator dupcomm, not pc->comm! */ 4773f457be1SHong Zhang ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 4783f457be1SHong Zhang ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 4793f457be1SHong Zhang 4803f457be1SHong Zhang /* create vec scatters */ 4813f457be1SHong Zhang if (!red->scatterin){ 4823f457be1SHong Zhang IS is1,is2; 4833f457be1SHong Zhang PetscInt *idx1,*idx2,i,j,k; 4843f457be1SHong Zhang ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 4853f457be1SHong Zhang idx2 = idx1 + red->nsubcomm*mlocal; 4863f457be1SHong Zhang j = 0; 4873f457be1SHong Zhang for (k=0; k<red->nsubcomm; k++){ 4883f457be1SHong Zhang for (i=mstart; i<mend; i++){ 4893f457be1SHong Zhang idx1[j] = i; 4903f457be1SHong Zhang idx2[j++] = i + m*k; 4913f457be1SHong Zhang } 4923f457be1SHong Zhang } 4933f457be1SHong Zhang ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 4943f457be1SHong Zhang ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 4953f457be1SHong Zhang ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 4963f457be1SHong Zhang ierr = ISDestroy(is1);CHKERRQ(ierr); 4973f457be1SHong Zhang ierr = ISDestroy(is2);CHKERRQ(ierr); 4983f457be1SHong Zhang 4993f457be1SHong Zhang ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 5003f457be1SHong Zhang ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 5013f457be1SHong Zhang ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 5023f457be1SHong Zhang ierr = ISDestroy(is1);CHKERRQ(ierr); 5033f457be1SHong Zhang ierr = ISDestroy(is2);CHKERRQ(ierr); 5043f457be1SHong Zhang ierr = PetscFree(idx1);CHKERRQ(ierr); 5054b9ad928SBarry Smith } 5064b9ad928SBarry Smith } 50723ce1328SBarry Smith ierr = VecDestroy(vec);CHKERRQ(ierr); 5084b9ad928SBarry Smith 5094b9ad928SBarry Smith /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 5103f457be1SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5114b9ad928SBarry Smith if (size == 1) { 5124b9ad928SBarry Smith red->useparallelmat = PETSC_FALSE; 5134b9ad928SBarry Smith } 5144b9ad928SBarry Smith 5154b9ad928SBarry Smith if (red->useparallelmat) { 5164b9ad928SBarry Smith if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 5174b9ad928SBarry Smith /* destroy old matrices */ 5184b9ad928SBarry Smith if (red->pmats) { 519b3804887SHong Zhang //ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 520b3804887SHong Zhang ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 5214b9ad928SBarry Smith } 5224b9ad928SBarry Smith } else if (pc->setupcalled == 1) { 5234b9ad928SBarry Smith reuse = MAT_REUSE_MATRIX; 5244b9ad928SBarry Smith str = SAME_NONZERO_PATTERN; 5254b9ad928SBarry Smith } 5264b9ad928SBarry Smith 5273f457be1SHong Zhang /* grab the parallel matrix and put it into processors of a subcomminicator */ 528f664ae05SHong Zhang /*--------------------------------------------------------------------------*/ 529f664ae05SHong Zhang ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 530b3804887SHong Zhang ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr); 5313f457be1SHong Zhang /* tell PC of the subcommunicator its operators */ 532b3804887SHong Zhang ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr); 5334b9ad928SBarry Smith } else { 5344b9ad928SBarry Smith ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 5354b9ad928SBarry Smith } 5364b9ad928SBarry Smith ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 5374b9ad928SBarry Smith ierr = PCSetUp(red->pc);CHKERRQ(ierr); 5384b9ad928SBarry Smith PetscFunctionReturn(0); 5394b9ad928SBarry Smith } 5404b9ad928SBarry Smith 5414b9ad928SBarry Smith #undef __FUNCT__ 5424b9ad928SBarry Smith #define __FUNCT__ "PCApply_Redundant" 5436849ba73SBarry Smith static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 5444b9ad928SBarry Smith { 5454b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 546dfbe8321SBarry Smith PetscErrorCode ierr; 5473f457be1SHong Zhang PetscScalar *array; 5484b9ad928SBarry Smith 5494b9ad928SBarry Smith PetscFunctionBegin; 5503f457be1SHong Zhang /* scatter x to xdup */ 5513f457be1SHong Zhang ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 5523f457be1SHong Zhang ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 5533f457be1SHong Zhang 5543f457be1SHong Zhang /* place xdup's local array into xsub */ 5553f457be1SHong Zhang ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 5563f457be1SHong Zhang ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 5574b9ad928SBarry Smith 5584b9ad928SBarry Smith /* apply preconditioner on each processor */ 5593f457be1SHong Zhang ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 5603f457be1SHong Zhang ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 5613f457be1SHong Zhang ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 5624b9ad928SBarry Smith 5633f457be1SHong Zhang /* place ysub's local array into ydup */ 5643f457be1SHong Zhang ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 5653f457be1SHong Zhang ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 5663f457be1SHong Zhang 5673f457be1SHong Zhang /* scatter ydup to y */ 5683f457be1SHong Zhang ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 5693f457be1SHong Zhang ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 5703f457be1SHong Zhang ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 5713f457be1SHong Zhang ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 5724b9ad928SBarry Smith PetscFunctionReturn(0); 5734b9ad928SBarry Smith } 5744b9ad928SBarry Smith 5754b9ad928SBarry Smith #undef __FUNCT__ 5764b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_Redundant" 5776849ba73SBarry Smith static PetscErrorCode PCDestroy_Redundant(PC pc) 5784b9ad928SBarry Smith { 5794b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 580dfbe8321SBarry Smith PetscErrorCode ierr; 5814b9ad928SBarry Smith 5824b9ad928SBarry Smith PetscFunctionBegin; 5834b9ad928SBarry Smith if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 5844b9ad928SBarry Smith if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 5853f457be1SHong Zhang if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 5863f457be1SHong Zhang if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 5873f457be1SHong Zhang if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 5883f457be1SHong Zhang if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 589b3804887SHong Zhang if (red->pmats) { 590b3804887SHong Zhang ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 5913f457be1SHong Zhang } 5923f457be1SHong Zhang 593b3804887SHong Zhang 5944b9ad928SBarry Smith ierr = PCDestroy(red->pc);CHKERRQ(ierr); 5954b9ad928SBarry Smith ierr = PetscFree(red);CHKERRQ(ierr); 5964b9ad928SBarry Smith PetscFunctionReturn(0); 5974b9ad928SBarry Smith } 5984b9ad928SBarry Smith 5994b9ad928SBarry Smith #undef __FUNCT__ 6004b9ad928SBarry Smith #define __FUNCT__ "PCSetFromOptions_Redundant" 6016849ba73SBarry Smith static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 6024b9ad928SBarry Smith { 6034b9ad928SBarry Smith PetscFunctionBegin; 6044b9ad928SBarry Smith PetscFunctionReturn(0); 6054b9ad928SBarry Smith } 6064b9ad928SBarry Smith 6074b9ad928SBarry Smith EXTERN_C_BEGIN 6084b9ad928SBarry Smith #undef __FUNCT__ 6094b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter_Redundant" 610dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 6114b9ad928SBarry Smith { 6124b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 613dfbe8321SBarry Smith PetscErrorCode ierr; 6144b9ad928SBarry Smith 6154b9ad928SBarry Smith PetscFunctionBegin; 6164b9ad928SBarry Smith red->scatterin = in; 6174b9ad928SBarry Smith red->scatterout = out; 6184b9ad928SBarry Smith ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 6194b9ad928SBarry Smith ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 6204b9ad928SBarry Smith PetscFunctionReturn(0); 6214b9ad928SBarry Smith } 6224b9ad928SBarry Smith EXTERN_C_END 6234b9ad928SBarry Smith 6244b9ad928SBarry Smith #undef __FUNCT__ 6254b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter" 6264b9ad928SBarry Smith /*@ 6274b9ad928SBarry Smith PCRedundantSetScatter - Sets the scatter used to copy values into the 6284b9ad928SBarry Smith redundant local solve and the scatter to move them back into the global 6294b9ad928SBarry Smith vector. 6304b9ad928SBarry Smith 6314b9ad928SBarry Smith Collective on PC 6324b9ad928SBarry Smith 6334b9ad928SBarry Smith Input Parameters: 6344b9ad928SBarry Smith + pc - the preconditioner context 6354b9ad928SBarry Smith . in - the scatter to move the values in 6364b9ad928SBarry Smith - out - the scatter to move them out 6374b9ad928SBarry Smith 6384b9ad928SBarry Smith Level: advanced 6394b9ad928SBarry Smith 6404b9ad928SBarry Smith .keywords: PC, redundant solve 6414b9ad928SBarry Smith @*/ 642dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 6434b9ad928SBarry Smith { 644dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 6454b9ad928SBarry Smith 6464b9ad928SBarry Smith PetscFunctionBegin; 6474482741eSBarry Smith PetscValidHeaderSpecific(pc,PC_COOKIE,1); 6484482741eSBarry Smith PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 6494482741eSBarry Smith PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 6504b9ad928SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 6514b9ad928SBarry Smith if (f) { 6524b9ad928SBarry Smith ierr = (*f)(pc,in,out);CHKERRQ(ierr); 6534b9ad928SBarry Smith } 6544b9ad928SBarry Smith PetscFunctionReturn(0); 6554b9ad928SBarry Smith } 6564b9ad928SBarry Smith 6574b9ad928SBarry Smith EXTERN_C_BEGIN 6584b9ad928SBarry Smith #undef __FUNCT__ 6594b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC_Redundant" 660dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 6614b9ad928SBarry Smith { 6624b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 6634b9ad928SBarry Smith 6644b9ad928SBarry Smith PetscFunctionBegin; 6654b9ad928SBarry Smith *innerpc = red->pc; 6664b9ad928SBarry Smith PetscFunctionReturn(0); 6674b9ad928SBarry Smith } 6684b9ad928SBarry Smith EXTERN_C_END 6694b9ad928SBarry Smith 6704b9ad928SBarry Smith #undef __FUNCT__ 6714b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC" 6724b9ad928SBarry Smith /*@ 6734b9ad928SBarry Smith PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 6744b9ad928SBarry Smith 6754b9ad928SBarry Smith Not Collective 6764b9ad928SBarry Smith 6774b9ad928SBarry Smith Input Parameter: 6784b9ad928SBarry Smith . pc - the preconditioner context 6794b9ad928SBarry Smith 6804b9ad928SBarry Smith Output Parameter: 6814b9ad928SBarry Smith . innerpc - the sequential PC 6824b9ad928SBarry Smith 6834b9ad928SBarry Smith Level: advanced 6844b9ad928SBarry Smith 6854b9ad928SBarry Smith .keywords: PC, redundant solve 6864b9ad928SBarry Smith @*/ 687dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 6884b9ad928SBarry Smith { 689dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(PC,PC*); 6904b9ad928SBarry Smith 6914b9ad928SBarry Smith PetscFunctionBegin; 6924482741eSBarry Smith PetscValidHeaderSpecific(pc,PC_COOKIE,1); 6934482741eSBarry Smith PetscValidPointer(innerpc,2); 6944b9ad928SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 6954b9ad928SBarry Smith if (f) { 6964b9ad928SBarry Smith ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 6974b9ad928SBarry Smith } 6984b9ad928SBarry Smith PetscFunctionReturn(0); 6994b9ad928SBarry Smith } 7004b9ad928SBarry Smith 7014b9ad928SBarry Smith EXTERN_C_BEGIN 7024b9ad928SBarry Smith #undef __FUNCT__ 7034b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators_Redundant" 704dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 7054b9ad928SBarry Smith { 7064b9ad928SBarry Smith PC_Redundant *red = (PC_Redundant*)pc->data; 7074b9ad928SBarry Smith 7084b9ad928SBarry Smith PetscFunctionBegin; 709b3804887SHong Zhang if (mat) *mat = red->pmats; 710b3804887SHong Zhang if (pmat) *pmat = red->pmats; 7114b9ad928SBarry Smith PetscFunctionReturn(0); 7124b9ad928SBarry Smith } 7134b9ad928SBarry Smith EXTERN_C_END 7144b9ad928SBarry Smith 7154b9ad928SBarry Smith #undef __FUNCT__ 7164b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators" 7174b9ad928SBarry Smith /*@ 7184b9ad928SBarry Smith PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 7194b9ad928SBarry Smith 7204b9ad928SBarry Smith Not Collective 7214b9ad928SBarry Smith 7224b9ad928SBarry Smith Input Parameter: 7234b9ad928SBarry Smith . pc - the preconditioner context 7244b9ad928SBarry Smith 7254b9ad928SBarry Smith Output Parameters: 7264b9ad928SBarry Smith + mat - the matrix 7274b9ad928SBarry Smith - pmat - the (possibly different) preconditioner matrix 7284b9ad928SBarry Smith 7294b9ad928SBarry Smith Level: advanced 7304b9ad928SBarry Smith 7314b9ad928SBarry Smith .keywords: PC, redundant solve 7324b9ad928SBarry Smith @*/ 733dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 7344b9ad928SBarry Smith { 735dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 7364b9ad928SBarry Smith 7374b9ad928SBarry Smith PetscFunctionBegin; 7384482741eSBarry Smith PetscValidHeaderSpecific(pc,PC_COOKIE,1); 7394482741eSBarry Smith if (mat) PetscValidPointer(mat,2); 7404482741eSBarry Smith if (pmat) PetscValidPointer(pmat,3); 7414b9ad928SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 7424b9ad928SBarry Smith if (f) { 7434b9ad928SBarry Smith ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 7444b9ad928SBarry Smith } 7454b9ad928SBarry Smith PetscFunctionReturn(0); 7464b9ad928SBarry Smith } 7474b9ad928SBarry Smith 7484b9ad928SBarry Smith /* -------------------------------------------------------------------------------------*/ 74937a17b4dSBarry Smith /*MC 75037a17b4dSBarry Smith PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 75137a17b4dSBarry Smith 75237a17b4dSBarry Smith 75337a17b4dSBarry Smith Options for the redundant preconditioners can be set with -redundant_pc_xxx 75437a17b4dSBarry Smith 75537a17b4dSBarry Smith Level: intermediate 75637a17b4dSBarry Smith 75737a17b4dSBarry Smith 75837a17b4dSBarry Smith .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 75937a17b4dSBarry Smith PCRedundantGetPC(), PCRedundantGetOperators() 76037a17b4dSBarry Smith 76137a17b4dSBarry Smith M*/ 76237a17b4dSBarry Smith 7634b9ad928SBarry Smith EXTERN_C_BEGIN 7644b9ad928SBarry Smith #undef __FUNCT__ 7654b9ad928SBarry Smith #define __FUNCT__ "PCCreate_Redundant" 766dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 7674b9ad928SBarry Smith { 768dfbe8321SBarry Smith PetscErrorCode ierr; 7694b9ad928SBarry Smith PC_Redundant *red; 770e060cb09SBarry Smith const char *prefix; 7713f457be1SHong Zhang PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 772*f6341431SHong Zhang PetscMPIInt rank,size,subrank,*subsize,duprank; 773*f6341431SHong Zhang MPI_Comm subcomm=0,dupcomm=0; 7743f457be1SHong Zhang 7754b9ad928SBarry Smith PetscFunctionBegin; 7764b9ad928SBarry Smith ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 77752e6d16bSBarry Smith ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 7784b9ad928SBarry Smith red->useparallelmat = PETSC_TRUE; 7794b9ad928SBarry Smith 7803f457be1SHong Zhang ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 7813f457be1SHong Zhang ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 7823f457be1SHong Zhang nsubcomm = size; 7833f457be1SHong Zhang ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 7843f457be1SHong Zhang if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 7853f457be1SHong Zhang 786*f6341431SHong Zhang /*-------------------------------------------------------------------------------------------------- 787*f6341431SHong Zhang To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a 788*f6341431SHong Zhang processe into a subcomm. 789*f6341431SHong Zhang An example: size=4, nsubcomm=3 790*f6341431SHong Zhang pc->comm: 791*f6341431SHong Zhang rank: [0] [1] [2] [3] 792*f6341431SHong Zhang color: 0 1 2 0 793*f6341431SHong Zhang 794*f6341431SHong Zhang subcomm: 795*f6341431SHong Zhang subrank: [0] [0] [0] [1] 796*f6341431SHong Zhang 797*f6341431SHong Zhang dupcomm: 798*f6341431SHong Zhang duprank: [0] [2] [3] [1] 799*f6341431SHong Zhang 800*f6341431SHong Zhang Here, subcomm[color = 0] has subsize=2, owns process [0] and [3] 801*f6341431SHong Zhang subcomm[color = 1] has subsize=1, owns process [1] 802*f6341431SHong Zhang subcomm[color = 2] has subsize=1, owns process [2] 803*f6341431SHong Zhang dupcomm has same number of processes as pc->comm, and its duprank maps 804*f6341431SHong Zhang processes in subcomm contiguously into a 1d array: 805*f6341431SHong Zhang duprank: [0] [1] [2] [3] 806*f6341431SHong Zhang rank: [0] [3] [1] [2] 807*f6341431SHong Zhang subcomm[0] subcomm[1] subcomm[2] 808*f6341431SHong Zhang ----------------------------------------------------------------------------------------*/ 809*f6341431SHong Zhang 8103f457be1SHong Zhang /* get size of each subcommunicators */ 8113f457be1SHong Zhang ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 8123f457be1SHong Zhang np_subcomm = size/nsubcomm; 8133f457be1SHong Zhang nleftover = size - nsubcomm*np_subcomm; 8143f457be1SHong Zhang for (i=0; i<nsubcomm; i++){ 8153f457be1SHong Zhang subsize[i] = np_subcomm; 8163f457be1SHong Zhang if (i<nleftover) subsize[i]++; 8173f457be1SHong Zhang } 8183f457be1SHong Zhang 8193f457be1SHong Zhang /* find color for this proc */ 8203f457be1SHong Zhang color = rank%nsubcomm; 8213f457be1SHong Zhang subrank = rank/nsubcomm; 8223f457be1SHong Zhang 8233f457be1SHong Zhang ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 8243f457be1SHong Zhang red->subcomm = subcomm; 8253f457be1SHong Zhang red->color = color; 8263f457be1SHong Zhang red->nsubcomm = nsubcomm; 8273f457be1SHong Zhang 8283f457be1SHong Zhang j = 0; duprank = 0; 8293f457be1SHong Zhang for (i=0; i<nsubcomm; i++){ 8303f457be1SHong Zhang if (j == color){ 8313f457be1SHong Zhang duprank += subrank; 8323f457be1SHong Zhang break; 8333f457be1SHong Zhang } 8343f457be1SHong Zhang duprank += subsize[i]; j++; 8353f457be1SHong Zhang } 8363f457be1SHong Zhang 837*f6341431SHong Zhang /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */ 8383f457be1SHong Zhang ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 8393f457be1SHong Zhang red->dupcomm = dupcomm; 8403f457be1SHong Zhang ierr = PetscFree(subsize);CHKERRQ(ierr); 841*f6341431SHong Zhang /* if (rank == 0) printf("[%d] subrank %d, duprank: %d\n",rank,subrank,duprank); */ 8423f457be1SHong Zhang 8434b9ad928SBarry Smith /* create the sequential PC that each processor has copy of */ 8443f457be1SHong Zhang ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 8454b9ad928SBarry Smith ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 8464b9ad928SBarry Smith ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 8474b9ad928SBarry Smith ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 8484b9ad928SBarry Smith ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 8494b9ad928SBarry Smith 8504b9ad928SBarry Smith pc->ops->apply = PCApply_Redundant; 8514b9ad928SBarry Smith pc->ops->applytranspose = 0; 8524b9ad928SBarry Smith pc->ops->setup = PCSetUp_Redundant; 8534b9ad928SBarry Smith pc->ops->destroy = PCDestroy_Redundant; 8544b9ad928SBarry Smith pc->ops->setfromoptions = PCSetFromOptions_Redundant; 8554b9ad928SBarry Smith pc->ops->setuponblocks = 0; 8564b9ad928SBarry Smith pc->ops->view = PCView_Redundant; 8574b9ad928SBarry Smith pc->ops->applyrichardson = 0; 8584b9ad928SBarry Smith 8594b9ad928SBarry Smith pc->data = (void*)red; 8604b9ad928SBarry Smith 8614b9ad928SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 8624b9ad928SBarry Smith PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 8634b9ad928SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 8644b9ad928SBarry Smith PCRedundantGetPC_Redundant);CHKERRQ(ierr); 8654b9ad928SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 8664b9ad928SBarry Smith PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 8674b9ad928SBarry Smith 8684b9ad928SBarry Smith PetscFunctionReturn(0); 8694b9ad928SBarry Smith } 8704b9ad928SBarry Smith EXTERN_C_END 871