1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat *pmats; /* matrix and optional preconditioner matrix */ 14 Mat pmats_sub; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 15 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 16 PetscTruth useparallelmat; 17 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 18 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 19 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 20 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 21 PetscInt color; /* color of processors in a subcommunicator */ 22 } PC_Redundant; 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "PCView_Redundant" 26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 27 { 28 PC_Redundant *red = (PC_Redundant*)pc->data; 29 PetscErrorCode ierr; 30 PetscMPIInt rank; 31 PetscTruth iascii,isstring; 32 PetscViewer sviewer; 33 34 PetscFunctionBegin; 35 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 37 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 38 if (iascii) { 39 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 40 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 41 if (!rank) { 42 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 43 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 44 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 45 } 46 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 47 } else if (isstring) { 48 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 49 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 50 if (!rank) { 51 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 54 } else { 55 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 61 #include "private/vecimpl.h" 62 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 63 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatGetRedundantMatrix" 67 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 68 { 69 PetscMPIInt rank,size,subrank,subsize; 70 MPI_Comm comm=mat->comm; 71 PetscErrorCode ierr; 72 PetscInt nsends,nrecvs,i,prid=100,itmp; 73 PetscMPIInt *send_rank,*recv_rank; 74 PetscInt *rowrange=mat->rmap.range,nzlocal; 75 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 76 Mat A=aij->A,B=aij->B; 77 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 78 Mat C; 79 80 PetscInt nleftover,np_subcomm,j; 81 PetscInt nz_A,nz_B,*sbuf_j; 82 PetscScalar *sbuf_a; 83 PetscInt cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 84 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 85 PetscInt *cols,ctmp,lwrite,*rptr,l; 86 PetscScalar *vals,*aworkA,*aworkB; 87 PetscMPIInt tag1,tag2,tag3,imdex; 88 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 89 MPI_Status recv_status,*send_status; 90 PetscInt *sbuf_nz,*rbuf_nz,count; 91 PetscInt **rbuf_j; 92 PetscScalar **rbuf_a; 93 94 PetscFunctionBegin; 95 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 96 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 97 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 98 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 99 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 100 /* 101 ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize); 102 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 103 */ 104 /* get the destination processors */ 105 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 106 recv_rank = send_rank + size; 107 np_subcomm = size/nsubcomm; 108 nleftover = size - nsubcomm*np_subcomm; 109 nsends = 0; nrecvs = 0; 110 for (i=0; i<size; i++){ /* i=rank*/ 111 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 112 send_rank[nsends] = i; nsends++; 113 recv_rank[nrecvs++] = i; 114 } 115 } 116 if (rank >= size - nleftover){/* this proc is a leftover processor */ 117 i = size-nleftover-1; 118 j = 0; 119 while (j < nsubcomm - nleftover){ 120 send_rank[nsends++] = i; 121 i--; j++; 122 } 123 } 124 125 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 126 for (i=0; i<nleftover; i++){ 127 recv_rank[nrecvs++] = size-nleftover+i; 128 } 129 } 130 if (prid == rank){ 131 printf("[%d] sends to ",rank); 132 for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]); 133 printf(" \n"); 134 } 135 /* 136 ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs); 137 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 138 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 139 */ 140 141 /* get this processor's nzlocal=nz_A+nz_B */ 142 nz_A = a->nz; nz_B = b->nz; 143 nzlocal = nz_A + nz_B; 144 145 /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */ 146 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 147 ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 148 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 149 150 rptr = sbuf_j; 151 cols = sbuf_j + rend-rstart + 1; 152 vals = sbuf_a; 153 rptr[0] = 0; 154 for (i=0; i<rend-rstart; i++){ 155 row = i + rstart; 156 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 157 ncols = nzA + nzB; 158 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 159 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 160 /* load the column indices for this row into cols */ 161 lwrite = 0; 162 for (l=0; l<nzB; l++) { 163 if ((ctmp = bmap[cworkB[l]]) < cstart){ 164 vals[lwrite] = aworkB[l]; 165 cols[lwrite++] = ctmp; 166 } 167 } 168 for (l=0; l<nzA; l++){ 169 vals[lwrite] = aworkA[l]; 170 cols[lwrite++] = cstart + cworkA[l]; 171 } 172 for (l=0; l<nzB; l++) { 173 if ((ctmp = bmap[cworkB[l]]) >= cend){ 174 vals[lwrite] = aworkB[l]; 175 cols[lwrite++] = ctmp; 176 } 177 } 178 /* insert local matrix values into C */ 179 /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */ 180 181 vals += ncols; 182 cols += ncols; 183 rptr[i+1] = rptr[i] + ncols; 184 } 185 /* 186 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 */ 189 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 190 191 /* send nzlocal to others, and recv other's nzlocal */ 192 /*--------------------------------------------------*/ 193 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 194 rbuf_nz = sbuf_nz + nsends; 195 196 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 197 s_waits2 = s_waits1 + nsends; 198 s_waits3 = s_waits2 + nsends; 199 r_waits1 = s_waits3 + nsends; 200 r_waits2 = r_waits1 + nrecvs; 201 r_waits3 = r_waits2 + nrecvs; 202 203 /* get some new tags to keep the communication clean */ 204 ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr); 205 ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr); 206 ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr); 207 208 /* post receives of other's nzlocal */ 209 for (i=0; i<nrecvs; i++){ 210 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 211 } 212 213 /* send nzlocal to others */ 214 for (i=0; i<nsends; i++){ 215 sbuf_nz[i] = nzlocal; 216 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 217 if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]); 218 } 219 220 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 221 count = nrecvs; 222 while (count) { 223 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 224 recv_rank[imdex] = recv_status.MPI_SOURCE; 225 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */ 226 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 227 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 228 229 itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 230 rbuf_nz[imdex] += itmp+2; 231 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 232 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 233 count--; 234 } 235 236 /* wait on sends of nzlocal */ 237 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 238 239 /* send mat->i,j and mat->a to others, and recv from other's */ 240 /*-----------------------------------------------------------*/ 241 for (i=0; i<nsends; i++){ 242 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 243 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 244 ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 245 } 246 247 /* wait on receives of mat->i,j and mat->a */ 248 /*-----------------------------------------*/ 249 count = nrecvs; 250 while (count) { 251 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 252 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 253 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 254 count--; 255 } 256 257 /* wait on sends of mat->i,j and mat->a */ 258 /*--------------------------------------*/ 259 if (nsends) { 260 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 261 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 262 } 263 ierr = PetscFree(sbuf_nz);CHKERRQ(ierr); 264 ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr); 265 266 /* create redundant matrix */ 267 /*-------------------------*/ 268 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 269 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 270 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 271 272 /* insert local matrix entries */ 273 rptr = sbuf_j; 274 cols = sbuf_j + rend-rstart + 1; 275 vals = sbuf_a; 276 for (i=0; i<rend-rstart; i++){ 277 row = i + rstart; 278 ncols = rptr[i+1] - rptr[i]; 279 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 280 vals += ncols; 281 cols += ncols; 282 } 283 /* insert received matrix entries */ 284 for (imdex=0; imdex<nrecvs; imdex++){ 285 286 rstart = rowrange[recv_rank[imdex]]; 287 rend = rowrange[recv_rank[imdex]+1]; 288 rptr = rbuf_j[imdex]; 289 cols = rbuf_j[imdex] + rend-rstart + 1; 290 vals = rbuf_a[imdex]; 291 for (i=0; i<rend-rstart; i++){ 292 row = i + rstart; 293 ncols = rptr[i+1] - rptr[i]; 294 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 295 vals += ncols; 296 cols += ncols; 297 } 298 } 299 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 300 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 301 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 302 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 303 *matredundant = C; 304 305 /* free space */ 306 ierr = PetscFree(send_rank);CHKERRQ(ierr); 307 ierr = PetscFree(sbuf_j);CHKERRQ(ierr); 308 ierr = PetscFree(sbuf_a);CHKERRQ(ierr); 309 for (i=0; i<nrecvs; i++){ 310 ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr); 311 ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr); 312 } 313 ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "PCSetUp_Redundant" 319 static PetscErrorCode PCSetUp_Redundant(PC pc) 320 { 321 PC_Redundant *red = (PC_Redundant*)pc->data; 322 PetscErrorCode ierr; 323 PetscInt mstart,mend,mlocal,m; 324 PetscMPIInt size; 325 IS isl; 326 MatReuse reuse = MAT_INITIAL_MATRIX; 327 MatStructure str = DIFFERENT_NONZERO_PATTERN; 328 MPI_Comm comm; 329 Vec vec; 330 331 PetscInt mlocal_sub; 332 PetscMPIInt subsize,subrank; 333 PetscInt rstart_sub,rend_sub,mloc_sub; 334 Mat Aredundant; 335 336 PetscFunctionBegin; 337 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 338 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 339 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 340 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 341 if (!pc->setupcalled) { 342 /* create working vectors xsub/ysub and xdup/ydup */ 343 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 344 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 345 346 /* get local size of xsub/ysub */ 347 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 348 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 349 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 350 if (subrank+1 < subsize){ 351 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 352 } else { 353 rend_sub = m; 354 } 355 mloc_sub = rend_sub - rstart_sub; 356 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 357 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 358 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 359 360 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 361 Note: we use communicator dupcomm, not pc->comm! */ 362 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 363 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 364 365 /* create vec scatters */ 366 if (!red->scatterin){ 367 IS is1,is2; 368 PetscInt *idx1,*idx2,i,j,k; 369 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 370 idx2 = idx1 + red->nsubcomm*mlocal; 371 j = 0; 372 for (k=0; k<red->nsubcomm; k++){ 373 for (i=mstart; i<mend; i++){ 374 idx1[j] = i; 375 idx2[j++] = i + m*k; 376 } 377 } 378 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 379 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 380 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 381 ierr = ISDestroy(is1);CHKERRQ(ierr); 382 ierr = ISDestroy(is2);CHKERRQ(ierr); 383 384 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 385 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 386 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 387 ierr = ISDestroy(is1);CHKERRQ(ierr); 388 ierr = ISDestroy(is2);CHKERRQ(ierr); 389 ierr = PetscFree(idx1);CHKERRQ(ierr); 390 } 391 } 392 ierr = VecDestroy(vec);CHKERRQ(ierr); 393 394 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 396 if (size == 1) { 397 red->useparallelmat = PETSC_FALSE; 398 } 399 400 if (red->useparallelmat) { 401 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 402 /* destroy old matrices */ 403 if (red->pmats) { 404 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 405 } 406 } else if (pc->setupcalled == 1) { 407 reuse = MAT_REUSE_MATRIX; 408 str = SAME_NONZERO_PATTERN; 409 } 410 411 /* grab the parallel matrix and put it into processors of a subcomminicator */ 412 /*--------------------------------------------------------------------------*/ 413 if (red->nsubcomm == size){ 414 /* create sequential matrices in each processor */ 415 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 416 ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr); 417 ierr = ISDestroy(isl);CHKERRQ(ierr); 418 ierr = PCSetOperators(red->pc,red->pmats[0],red->pmats[0],str);CHKERRQ(ierr); 419 } else { 420 /* create mpi matrices in each subcommunicator */ 421 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 422 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&Aredundant);CHKERRQ(ierr); 423 red->pmats_sub = Aredundant; 424 /* tell PC of the subcommunicator its operators */ 425 ierr = PCSetOperators(red->pc,Aredundant,Aredundant,str);CHKERRQ(ierr); 426 } 427 } else { 428 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 429 } 430 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 431 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNCT__ 436 #define __FUNCT__ "PCApply_Redundant" 437 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 438 { 439 PC_Redundant *red = (PC_Redundant*)pc->data; 440 PetscErrorCode ierr; 441 PetscScalar *array; 442 443 PetscFunctionBegin; 444 /* scatter x to xdup */ 445 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 446 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 447 448 /* place xdup's local array into xsub */ 449 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 450 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 451 452 /* apply preconditioner on each processor */ 453 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 454 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 455 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 456 457 /* place ysub's local array into ydup */ 458 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 459 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 460 461 /* scatter ydup to y */ 462 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 463 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 464 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 465 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCDestroy_Redundant" 471 static PetscErrorCode PCDestroy_Redundant(PC pc) 472 { 473 PC_Redundant *red = (PC_Redundant*)pc->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 478 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 479 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 480 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 481 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 482 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 483 if (red->pmats_sub) { 484 ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr); 485 } 486 487 if (red->pmats) { 488 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 489 } 490 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 491 ierr = PetscFree(red);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "PCSetFromOptions_Redundant" 497 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 498 { 499 PetscFunctionBegin; 500 PetscFunctionReturn(0); 501 } 502 503 EXTERN_C_BEGIN 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 506 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 507 { 508 PC_Redundant *red = (PC_Redundant*)pc->data; 509 PetscErrorCode ierr; 510 511 PetscFunctionBegin; 512 red->scatterin = in; 513 red->scatterout = out; 514 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 515 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 EXTERN_C_END 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "PCRedundantSetScatter" 522 /*@ 523 PCRedundantSetScatter - Sets the scatter used to copy values into the 524 redundant local solve and the scatter to move them back into the global 525 vector. 526 527 Collective on PC 528 529 Input Parameters: 530 + pc - the preconditioner context 531 . in - the scatter to move the values in 532 - out - the scatter to move them out 533 534 Level: advanced 535 536 .keywords: PC, redundant solve 537 @*/ 538 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 539 { 540 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 541 542 PetscFunctionBegin; 543 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 544 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 545 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 546 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 547 if (f) { 548 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 549 } 550 PetscFunctionReturn(0); 551 } 552 553 EXTERN_C_BEGIN 554 #undef __FUNCT__ 555 #define __FUNCT__ "PCRedundantGetPC_Redundant" 556 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 557 { 558 PC_Redundant *red = (PC_Redundant*)pc->data; 559 560 PetscFunctionBegin; 561 *innerpc = red->pc; 562 PetscFunctionReturn(0); 563 } 564 EXTERN_C_END 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "PCRedundantGetPC" 568 /*@ 569 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 570 571 Not Collective 572 573 Input Parameter: 574 . pc - the preconditioner context 575 576 Output Parameter: 577 . innerpc - the sequential PC 578 579 Level: advanced 580 581 .keywords: PC, redundant solve 582 @*/ 583 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 584 { 585 PetscErrorCode ierr,(*f)(PC,PC*); 586 587 PetscFunctionBegin; 588 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 589 PetscValidPointer(innerpc,2); 590 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 591 if (f) { 592 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 593 } 594 PetscFunctionReturn(0); 595 } 596 597 EXTERN_C_BEGIN 598 #undef __FUNCT__ 599 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 600 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 601 { 602 PC_Redundant *red = (PC_Redundant*)pc->data; 603 604 PetscFunctionBegin; 605 if (mat) *mat = red->pmats[0]; 606 if (pmat) *pmat = red->pmats[0]; 607 PetscFunctionReturn(0); 608 } 609 EXTERN_C_END 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCRedundantGetOperators" 613 /*@ 614 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 615 616 Not Collective 617 618 Input Parameter: 619 . pc - the preconditioner context 620 621 Output Parameters: 622 + mat - the matrix 623 - pmat - the (possibly different) preconditioner matrix 624 625 Level: advanced 626 627 .keywords: PC, redundant solve 628 @*/ 629 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 630 { 631 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 632 633 PetscFunctionBegin; 634 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 635 if (mat) PetscValidPointer(mat,2); 636 if (pmat) PetscValidPointer(pmat,3); 637 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 638 if (f) { 639 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 640 } 641 PetscFunctionReturn(0); 642 } 643 644 /* -------------------------------------------------------------------------------------*/ 645 /*MC 646 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 647 648 649 Options for the redundant preconditioners can be set with -redundant_pc_xxx 650 651 Level: intermediate 652 653 654 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 655 PCRedundantGetPC(), PCRedundantGetOperators() 656 657 M*/ 658 659 EXTERN_C_BEGIN 660 #undef __FUNCT__ 661 #define __FUNCT__ "PCCreate_Redundant" 662 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 663 { 664 PetscErrorCode ierr; 665 PC_Redundant *red; 666 const char *prefix; 667 668 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 669 PetscMPIInt rank,size,subrank,*subsize; 670 MPI_Comm subcomm; 671 PetscMPIInt duprank; 672 PetscMPIInt rank_dup,size_dup; 673 MPI_Comm dupcomm; 674 675 PetscFunctionBegin; 676 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 677 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 678 red->useparallelmat = PETSC_TRUE; 679 680 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 681 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 682 nsubcomm = size; 683 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 684 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 685 686 /* get size of each subcommunicators */ 687 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 688 np_subcomm = size/nsubcomm; 689 nleftover = size - nsubcomm*np_subcomm; 690 for (i=0; i<nsubcomm; i++){ 691 subsize[i] = np_subcomm; 692 if (i<nleftover) subsize[i]++; 693 } 694 695 /* find color for this proc */ 696 color = rank%nsubcomm; 697 subrank = rank/nsubcomm; 698 699 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 700 red->subcomm = subcomm; 701 red->color = color; 702 red->nsubcomm = nsubcomm; 703 704 j = 0; duprank = 0; 705 for (i=0; i<nsubcomm; i++){ 706 if (j == color){ 707 duprank += subrank; 708 break; 709 } 710 duprank += subsize[i]; j++; 711 } 712 /* 713 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 714 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 715 */ 716 717 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 718 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 719 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 720 /* 721 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 722 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 723 */ 724 red->dupcomm = dupcomm; 725 ierr = PetscFree(subsize);CHKERRQ(ierr); 726 727 /* create the sequential PC that each processor has copy of */ 728 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 729 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 730 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 731 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 732 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 733 734 pc->ops->apply = PCApply_Redundant; 735 pc->ops->applytranspose = 0; 736 pc->ops->setup = PCSetUp_Redundant; 737 pc->ops->destroy = PCDestroy_Redundant; 738 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 739 pc->ops->setuponblocks = 0; 740 pc->ops->view = PCView_Redundant; 741 pc->ops->applyrichardson = 0; 742 743 pc->data = (void*)red; 744 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 746 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 748 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 750 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 751 752 PetscFunctionReturn(0); 753 } 754 EXTERN_C_END 755