1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat *pmats; /* matrix and optional preconditioner matrix */ 14 Mat pmats_sub; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 15 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 16 PetscTruth useparallelmat; 17 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 18 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 19 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 20 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 21 PetscInt color; /* color of processors in a subcommunicator */ 22 } PC_Redundant; 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "PCView_Redundant" 26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 27 { 28 PC_Redundant *red = (PC_Redundant*)pc->data; 29 PetscErrorCode ierr; 30 PetscMPIInt rank; 31 PetscTruth iascii,isstring; 32 PetscViewer sviewer; 33 34 PetscFunctionBegin; 35 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 37 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 38 if (iascii) { 39 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 40 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 41 if (!rank) { 42 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 43 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 44 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 45 } 46 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 47 } else if (isstring) { 48 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 49 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 50 if (!rank) { 51 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 54 } else { 55 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 61 #include "private/vecimpl.h" 62 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 63 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatGetRedundantMatrix" 67 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 68 { 69 PetscMPIInt rank,size,subrank,subsize; 70 MPI_Comm comm=mat->comm; 71 PetscErrorCode ierr; 72 PetscInt nsends,nrecvs,i,prid=100,itmp; 73 PetscMPIInt *send_rank,*recv_rank; 74 PetscInt *rowrange=mat->rmap.range,nzlocal; 75 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 76 Mat A=aij->A,B=aij->B; 77 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 78 Mat C=*matredundant; 79 80 PetscInt nleftover,np_subcomm,j; 81 PetscInt nz_A,nz_B,*sbuf_j; 82 PetscScalar *sbuf_a; 83 PetscInt cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 84 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 85 PetscInt *cols,ctmp,lwrite,*rptr,l; 86 PetscScalar *vals,*aworkA,*aworkB; 87 PetscMPIInt tag1,tag2,tag3,imdex; 88 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 89 MPI_Status recv_status,*send_status; 90 PetscInt *sbuf_nz,*rbuf_nz,count; 91 PetscInt **rbuf_j; 92 PetscScalar **rbuf_a; 93 94 PetscFunctionBegin; 95 if (reuse == MAT_REUSE_MATRIX) { 96 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 97 if (M != N || M != mat->rmap.N) { 98 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong global size"); 99 } 100 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 101 if (M != N || M != mlocal_sub) { 102 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong local size"); 103 } 104 } 105 106 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 109 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 110 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 111 /* 112 ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize); 113 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 114 */ 115 /* get the destination processors */ 116 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 117 recv_rank = send_rank + size; 118 np_subcomm = size/nsubcomm; 119 nleftover = size - nsubcomm*np_subcomm; 120 nsends = 0; nrecvs = 0; 121 for (i=0; i<size; i++){ /* i=rank*/ 122 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 123 send_rank[nsends] = i; nsends++; 124 recv_rank[nrecvs++] = i; 125 } 126 } 127 if (rank >= size - nleftover){/* this proc is a leftover processor */ 128 i = size-nleftover-1; 129 j = 0; 130 while (j < nsubcomm - nleftover){ 131 send_rank[nsends++] = i; 132 i--; j++; 133 } 134 } 135 136 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 137 for (i=0; i<nleftover; i++){ 138 recv_rank[nrecvs++] = size-nleftover+i; 139 } 140 } 141 if (prid == rank){ 142 printf("[%d] sends to ",rank); 143 for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]); 144 printf(" \n"); 145 } 146 /* 147 ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs); 148 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 149 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 150 */ 151 152 /* get this processor's nzlocal=nz_A+nz_B */ 153 nz_A = a->nz; nz_B = b->nz; 154 nzlocal = nz_A + nz_B; 155 156 /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */ 157 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 158 ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 159 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 160 161 rptr = sbuf_j; 162 cols = sbuf_j + rend-rstart + 1; 163 vals = sbuf_a; 164 rptr[0] = 0; 165 for (i=0; i<rend-rstart; i++){ 166 row = i + rstart; 167 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 168 ncols = nzA + nzB; 169 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 170 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 171 /* load the column indices for this row into cols */ 172 lwrite = 0; 173 for (l=0; l<nzB; l++) { 174 if ((ctmp = bmap[cworkB[l]]) < cstart){ 175 vals[lwrite] = aworkB[l]; 176 cols[lwrite++] = ctmp; 177 } 178 } 179 for (l=0; l<nzA; l++){ 180 vals[lwrite] = aworkA[l]; 181 cols[lwrite++] = cstart + cworkA[l]; 182 } 183 for (l=0; l<nzB; l++) { 184 if ((ctmp = bmap[cworkB[l]]) >= cend){ 185 vals[lwrite] = aworkB[l]; 186 cols[lwrite++] = ctmp; 187 } 188 } 189 /* insert local matrix values into C */ 190 /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */ 191 192 vals += ncols; 193 cols += ncols; 194 rptr[i+1] = rptr[i] + ncols; 195 } 196 /* 197 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 */ 200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 201 202 /* send nzlocal to others, and recv other's nzlocal */ 203 /*--------------------------------------------------*/ 204 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 205 rbuf_nz = sbuf_nz + nsends; 206 207 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 208 s_waits2 = s_waits1 + nsends; 209 s_waits3 = s_waits2 + nsends; 210 r_waits1 = s_waits3 + nsends; 211 r_waits2 = r_waits1 + nrecvs; 212 r_waits3 = r_waits2 + nrecvs; 213 214 /* get some new tags to keep the communication clean */ 215 ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr); 216 ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr); 217 ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr); 218 219 /* post receives of other's nzlocal */ 220 for (i=0; i<nrecvs; i++){ 221 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 222 } 223 224 /* send nzlocal to others */ 225 for (i=0; i<nsends; i++){ 226 sbuf_nz[i] = nzlocal; 227 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 228 if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]); 229 } 230 231 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 232 count = nrecvs; 233 while (count) { 234 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 235 recv_rank[imdex] = recv_status.MPI_SOURCE; 236 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */ 237 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 238 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 239 240 itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 241 rbuf_nz[imdex] += itmp+2; 242 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 243 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 244 count--; 245 } 246 247 /* wait on sends of nzlocal */ 248 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 249 250 /* send mat->i,j and mat->a to others, and recv from other's */ 251 /*-----------------------------------------------------------*/ 252 for (i=0; i<nsends; i++){ 253 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 254 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 255 ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 256 } 257 258 /* wait on receives of mat->i,j and mat->a */ 259 /*-----------------------------------------*/ 260 count = nrecvs; 261 while (count) { 262 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 263 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 264 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 265 count--; 266 } 267 268 /* wait on sends of mat->i,j and mat->a */ 269 /*--------------------------------------*/ 270 if (nsends) { 271 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 272 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 273 } 274 ierr = PetscFree(sbuf_nz);CHKERRQ(ierr); 275 ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr); 276 277 /* create redundant matrix */ 278 /*-------------------------*/ 279 if (reuse == MAT_INITIAL_MATRIX){ 280 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 281 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 282 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 283 } else { 284 C = *matredundant; 285 } 286 /* insert local matrix entries */ 287 rptr = sbuf_j; 288 cols = sbuf_j + rend-rstart + 1; 289 vals = sbuf_a; 290 for (i=0; i<rend-rstart; i++){ 291 row = i + rstart; 292 ncols = rptr[i+1] - rptr[i]; 293 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 294 vals += ncols; 295 cols += ncols; 296 } 297 /* insert received matrix entries */ 298 for (imdex=0; imdex<nrecvs; imdex++){ 299 300 rstart = rowrange[recv_rank[imdex]]; 301 rend = rowrange[recv_rank[imdex]+1]; 302 rptr = rbuf_j[imdex]; 303 cols = rbuf_j[imdex] + rend-rstart + 1; 304 vals = rbuf_a[imdex]; 305 for (i=0; i<rend-rstart; i++){ 306 row = i + rstart; 307 ncols = rptr[i+1] - rptr[i]; 308 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 309 vals += ncols; 310 cols += ncols; 311 } 312 } 313 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 314 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 315 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 316 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 317 if (reuse == MAT_INITIAL_MATRIX){ 318 *matredundant = C; 319 } 320 321 /* free space */ 322 ierr = PetscFree(send_rank);CHKERRQ(ierr); 323 ierr = PetscFree(sbuf_j);CHKERRQ(ierr); 324 ierr = PetscFree(sbuf_a);CHKERRQ(ierr); 325 for (i=0; i<nrecvs; i++){ 326 ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr); 327 ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr); 328 } 329 ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr); 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCSetUp_Redundant" 335 static PetscErrorCode PCSetUp_Redundant(PC pc) 336 { 337 PC_Redundant *red = (PC_Redundant*)pc->data; 338 PetscErrorCode ierr; 339 PetscInt mstart,mend,mlocal,m; 340 PetscMPIInt size; 341 MatReuse reuse = MAT_INITIAL_MATRIX; 342 MatStructure str = DIFFERENT_NONZERO_PATTERN; 343 MPI_Comm comm; 344 Vec vec; 345 346 PetscInt mlocal_sub; 347 PetscMPIInt subsize,subrank; 348 PetscInt rstart_sub,rend_sub,mloc_sub; 349 350 PetscFunctionBegin; 351 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 352 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 353 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 354 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 355 if (!pc->setupcalled) { 356 /* create working vectors xsub/ysub and xdup/ydup */ 357 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 358 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 359 360 /* get local size of xsub/ysub */ 361 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 362 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 363 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 364 if (subrank+1 < subsize){ 365 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 366 } else { 367 rend_sub = m; 368 } 369 mloc_sub = rend_sub - rstart_sub; 370 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 371 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 372 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 373 374 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 375 Note: we use communicator dupcomm, not pc->comm! */ 376 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 377 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 378 379 /* create vec scatters */ 380 if (!red->scatterin){ 381 IS is1,is2; 382 PetscInt *idx1,*idx2,i,j,k; 383 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 384 idx2 = idx1 + red->nsubcomm*mlocal; 385 j = 0; 386 for (k=0; k<red->nsubcomm; k++){ 387 for (i=mstart; i<mend; i++){ 388 idx1[j] = i; 389 idx2[j++] = i + m*k; 390 } 391 } 392 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 393 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 394 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 395 ierr = ISDestroy(is1);CHKERRQ(ierr); 396 ierr = ISDestroy(is2);CHKERRQ(ierr); 397 398 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 399 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 400 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 401 ierr = ISDestroy(is1);CHKERRQ(ierr); 402 ierr = ISDestroy(is2);CHKERRQ(ierr); 403 ierr = PetscFree(idx1);CHKERRQ(ierr); 404 } 405 } 406 ierr = VecDestroy(vec);CHKERRQ(ierr); 407 408 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 409 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 410 if (size == 1) { 411 red->useparallelmat = PETSC_FALSE; 412 } 413 414 if (red->useparallelmat) { 415 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 416 /* destroy old matrices */ 417 if (red->pmats) { 418 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 419 } 420 } else if (pc->setupcalled == 1) { 421 reuse = MAT_REUSE_MATRIX; 422 str = SAME_NONZERO_PATTERN; 423 } 424 425 /* grab the parallel matrix and put it into processors of a subcomminicator */ 426 /*--------------------------------------------------------------------------*/ 427 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 428 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats_sub);CHKERRQ(ierr); 429 /* tell PC of the subcommunicator its operators */ 430 ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr); 431 } else { 432 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 433 } 434 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 435 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "PCApply_Redundant" 441 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 442 { 443 PC_Redundant *red = (PC_Redundant*)pc->data; 444 PetscErrorCode ierr; 445 PetscScalar *array; 446 447 PetscFunctionBegin; 448 /* scatter x to xdup */ 449 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 450 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 451 452 /* place xdup's local array into xsub */ 453 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 454 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 455 456 /* apply preconditioner on each processor */ 457 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 458 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 459 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 460 461 /* place ysub's local array into ydup */ 462 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 463 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 464 465 /* scatter ydup to y */ 466 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 467 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 468 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 469 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "PCDestroy_Redundant" 475 static PetscErrorCode PCDestroy_Redundant(PC pc) 476 { 477 PC_Redundant *red = (PC_Redundant*)pc->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 482 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 483 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 484 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 485 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 486 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 487 if (red->pmats_sub) { 488 ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr); 489 } 490 491 if (red->pmats) { 492 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 493 } 494 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 495 ierr = PetscFree(red);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "PCSetFromOptions_Redundant" 501 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 502 { 503 PetscFunctionBegin; 504 PetscFunctionReturn(0); 505 } 506 507 EXTERN_C_BEGIN 508 #undef __FUNCT__ 509 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 510 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 511 { 512 PC_Redundant *red = (PC_Redundant*)pc->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 red->scatterin = in; 517 red->scatterout = out; 518 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 519 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 EXTERN_C_END 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "PCRedundantSetScatter" 526 /*@ 527 PCRedundantSetScatter - Sets the scatter used to copy values into the 528 redundant local solve and the scatter to move them back into the global 529 vector. 530 531 Collective on PC 532 533 Input Parameters: 534 + pc - the preconditioner context 535 . in - the scatter to move the values in 536 - out - the scatter to move them out 537 538 Level: advanced 539 540 .keywords: PC, redundant solve 541 @*/ 542 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 543 { 544 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 545 546 PetscFunctionBegin; 547 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 548 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 549 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 550 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 551 if (f) { 552 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 553 } 554 PetscFunctionReturn(0); 555 } 556 557 EXTERN_C_BEGIN 558 #undef __FUNCT__ 559 #define __FUNCT__ "PCRedundantGetPC_Redundant" 560 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 561 { 562 PC_Redundant *red = (PC_Redundant*)pc->data; 563 564 PetscFunctionBegin; 565 *innerpc = red->pc; 566 PetscFunctionReturn(0); 567 } 568 EXTERN_C_END 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "PCRedundantGetPC" 572 /*@ 573 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 574 575 Not Collective 576 577 Input Parameter: 578 . pc - the preconditioner context 579 580 Output Parameter: 581 . innerpc - the sequential PC 582 583 Level: advanced 584 585 .keywords: PC, redundant solve 586 @*/ 587 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 588 { 589 PetscErrorCode ierr,(*f)(PC,PC*); 590 591 PetscFunctionBegin; 592 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 593 PetscValidPointer(innerpc,2); 594 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 595 if (f) { 596 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 597 } 598 PetscFunctionReturn(0); 599 } 600 601 EXTERN_C_BEGIN 602 #undef __FUNCT__ 603 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 604 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 605 { 606 PC_Redundant *red = (PC_Redundant*)pc->data; 607 608 PetscFunctionBegin; 609 if (mat) *mat = red->pmats[0]; 610 if (pmat) *pmat = red->pmats[0]; 611 PetscFunctionReturn(0); 612 } 613 EXTERN_C_END 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "PCRedundantGetOperators" 617 /*@ 618 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 619 620 Not Collective 621 622 Input Parameter: 623 . pc - the preconditioner context 624 625 Output Parameters: 626 + mat - the matrix 627 - pmat - the (possibly different) preconditioner matrix 628 629 Level: advanced 630 631 .keywords: PC, redundant solve 632 @*/ 633 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 634 { 635 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 636 637 PetscFunctionBegin; 638 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 639 if (mat) PetscValidPointer(mat,2); 640 if (pmat) PetscValidPointer(pmat,3); 641 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 642 if (f) { 643 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 644 } 645 PetscFunctionReturn(0); 646 } 647 648 /* -------------------------------------------------------------------------------------*/ 649 /*MC 650 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 651 652 653 Options for the redundant preconditioners can be set with -redundant_pc_xxx 654 655 Level: intermediate 656 657 658 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 659 PCRedundantGetPC(), PCRedundantGetOperators() 660 661 M*/ 662 663 EXTERN_C_BEGIN 664 #undef __FUNCT__ 665 #define __FUNCT__ "PCCreate_Redundant" 666 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 667 { 668 PetscErrorCode ierr; 669 PC_Redundant *red; 670 const char *prefix; 671 672 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 673 PetscMPIInt rank,size,subrank,*subsize; 674 MPI_Comm subcomm; 675 PetscMPIInt duprank; 676 PetscMPIInt rank_dup,size_dup; 677 MPI_Comm dupcomm; 678 679 PetscFunctionBegin; 680 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 681 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 682 red->useparallelmat = PETSC_TRUE; 683 684 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 685 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 686 nsubcomm = size; 687 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 688 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 689 690 /* get size of each subcommunicators */ 691 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 692 np_subcomm = size/nsubcomm; 693 nleftover = size - nsubcomm*np_subcomm; 694 for (i=0; i<nsubcomm; i++){ 695 subsize[i] = np_subcomm; 696 if (i<nleftover) subsize[i]++; 697 } 698 699 /* find color for this proc */ 700 color = rank%nsubcomm; 701 subrank = rank/nsubcomm; 702 703 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 704 red->subcomm = subcomm; 705 red->color = color; 706 red->nsubcomm = nsubcomm; 707 708 j = 0; duprank = 0; 709 for (i=0; i<nsubcomm; i++){ 710 if (j == color){ 711 duprank += subrank; 712 break; 713 } 714 duprank += subsize[i]; j++; 715 } 716 /* 717 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 718 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 719 */ 720 721 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 722 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 723 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 724 /* 725 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 726 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 727 */ 728 red->dupcomm = dupcomm; 729 ierr = PetscFree(subsize);CHKERRQ(ierr); 730 731 /* create the sequential PC that each processor has copy of */ 732 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 733 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 734 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 735 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 736 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 737 738 pc->ops->apply = PCApply_Redundant; 739 pc->ops->applytranspose = 0; 740 pc->ops->setup = PCSetUp_Redundant; 741 pc->ops->destroy = PCDestroy_Redundant; 742 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 743 pc->ops->setuponblocks = 0; 744 pc->ops->view = PCView_Redundant; 745 pc->ops->applyrichardson = 0; 746 747 pc->data = (void*)red; 748 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 750 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 751 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 752 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 754 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 755 756 PetscFunctionReturn(0); 757 } 758 EXTERN_C_END 759