1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat *pmats; /* matrix and optional preconditioner matrix */ 14 Mat pmats_sub; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 15 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 16 PetscTruth useparallelmat; 17 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 18 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 19 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 20 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 21 PetscInt color; /* color of processors in a subcommunicator */ 22 } PC_Redundant; 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "PCView_Redundant" 26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 27 { 28 PC_Redundant *red = (PC_Redundant*)pc->data; 29 PetscErrorCode ierr; 30 PetscMPIInt rank; 31 PetscTruth iascii,isstring; 32 PetscViewer sviewer; 33 34 PetscFunctionBegin; 35 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 37 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 38 if (iascii) { 39 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 40 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 41 if (!rank) { 42 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 43 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 44 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 45 } 46 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 47 } else if (isstring) { 48 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 49 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 50 if (!rank) { 51 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 54 } else { 55 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 61 #include "private/vecimpl.h" 62 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 63 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatGetRedundantMatrix" 67 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 68 { 69 PetscMPIInt rank,size,subrank,subsize; 70 MPI_Comm comm=mat->comm; 71 PetscErrorCode ierr; 72 PetscInt nsends,nrecvs,i,prid=100,itmp; 73 PetscMPIInt *send_rank,*recv_rank; 74 PetscInt *rowrange=mat->rmap.range,nzlocal; 75 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 76 Mat A=aij->A,B=aij->B; 77 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 78 Mat C=*matredundant; 79 80 PetscInt nleftover,np_subcomm,j; 81 PetscInt nz_A,nz_B,*sbuf_j; 82 PetscScalar *sbuf_a; 83 PetscInt cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 84 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 85 PetscInt *cols,ctmp,lwrite,*rptr,l; 86 PetscScalar *vals,*aworkA,*aworkB; 87 PetscMPIInt tag1,tag2,tag3,imdex; 88 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 89 MPI_Status recv_status,*send_status; 90 PetscInt *sbuf_nz,*rbuf_nz,count; 91 PetscInt **rbuf_j; 92 PetscScalar **rbuf_a; 93 94 PetscFunctionBegin; 95 if (reuse == MAT_REUSE_MATRIX) { 96 PetscTruth flag; 97 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 98 if (M != N || M != mat->rmap.N) { 99 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong global size"); 100 } 101 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 102 if (M != N || M != mlocal_sub) { 103 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong local size"); 104 } 105 } 106 107 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 110 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 111 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 112 /* 113 ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize); 114 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 115 */ 116 /* get the destination processors */ 117 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 118 recv_rank = send_rank + size; 119 np_subcomm = size/nsubcomm; 120 nleftover = size - nsubcomm*np_subcomm; 121 nsends = 0; nrecvs = 0; 122 for (i=0; i<size; i++){ /* i=rank*/ 123 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 124 send_rank[nsends] = i; nsends++; 125 recv_rank[nrecvs++] = i; 126 } 127 } 128 if (rank >= size - nleftover){/* this proc is a leftover processor */ 129 i = size-nleftover-1; 130 j = 0; 131 while (j < nsubcomm - nleftover){ 132 send_rank[nsends++] = i; 133 i--; j++; 134 } 135 } 136 137 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 138 for (i=0; i<nleftover; i++){ 139 recv_rank[nrecvs++] = size-nleftover+i; 140 } 141 } 142 if (prid == rank){ 143 printf("[%d] sends to ",rank); 144 for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]); 145 printf(" \n"); 146 } 147 /* 148 ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs); 149 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 150 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 151 */ 152 153 /* get this processor's nzlocal=nz_A+nz_B */ 154 nz_A = a->nz; nz_B = b->nz; 155 nzlocal = nz_A + nz_B; 156 157 /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */ 158 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 159 ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 160 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 161 162 rptr = sbuf_j; 163 cols = sbuf_j + rend-rstart + 1; 164 vals = sbuf_a; 165 rptr[0] = 0; 166 for (i=0; i<rend-rstart; i++){ 167 row = i + rstart; 168 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 169 ncols = nzA + nzB; 170 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 171 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 172 /* load the column indices for this row into cols */ 173 lwrite = 0; 174 for (l=0; l<nzB; l++) { 175 if ((ctmp = bmap[cworkB[l]]) < cstart){ 176 vals[lwrite] = aworkB[l]; 177 cols[lwrite++] = ctmp; 178 } 179 } 180 for (l=0; l<nzA; l++){ 181 vals[lwrite] = aworkA[l]; 182 cols[lwrite++] = cstart + cworkA[l]; 183 } 184 for (l=0; l<nzB; l++) { 185 if ((ctmp = bmap[cworkB[l]]) >= cend){ 186 vals[lwrite] = aworkB[l]; 187 cols[lwrite++] = ctmp; 188 } 189 } 190 /* insert local matrix values into C */ 191 /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */ 192 193 vals += ncols; 194 cols += ncols; 195 rptr[i+1] = rptr[i] + ncols; 196 } 197 /* 198 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 */ 201 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 202 203 /* send nzlocal to others, and recv other's nzlocal */ 204 /*--------------------------------------------------*/ 205 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 206 rbuf_nz = sbuf_nz + nsends; 207 208 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 209 s_waits2 = s_waits1 + nsends; 210 s_waits3 = s_waits2 + nsends; 211 r_waits1 = s_waits3 + nsends; 212 r_waits2 = r_waits1 + nrecvs; 213 r_waits3 = r_waits2 + nrecvs; 214 215 /* get some new tags to keep the communication clean */ 216 ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr); 217 ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr); 218 ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr); 219 220 /* post receives of other's nzlocal */ 221 for (i=0; i<nrecvs; i++){ 222 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 223 } 224 225 /* send nzlocal to others */ 226 for (i=0; i<nsends; i++){ 227 sbuf_nz[i] = nzlocal; 228 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 229 if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]); 230 } 231 232 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 233 count = nrecvs; 234 while (count) { 235 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 236 recv_rank[imdex] = recv_status.MPI_SOURCE; 237 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */ 238 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 239 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 240 241 itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 242 rbuf_nz[imdex] += itmp+2; 243 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 244 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 245 count--; 246 } 247 248 /* wait on sends of nzlocal */ 249 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 250 251 /* send mat->i,j and mat->a to others, and recv from other's */ 252 /*-----------------------------------------------------------*/ 253 for (i=0; i<nsends; i++){ 254 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 255 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 256 ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 257 } 258 259 /* wait on receives of mat->i,j and mat->a */ 260 /*-----------------------------------------*/ 261 count = nrecvs; 262 while (count) { 263 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 264 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 265 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 266 count--; 267 } 268 269 /* wait on sends of mat->i,j and mat->a */ 270 /*--------------------------------------*/ 271 if (nsends) { 272 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 273 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 274 } 275 ierr = PetscFree(sbuf_nz);CHKERRQ(ierr); 276 ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr); 277 278 /* create redundant matrix */ 279 /*-------------------------*/ 280 if (reuse == MAT_INITIAL_MATRIX){ 281 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 282 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 283 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 284 } else { 285 C = *matredundant; 286 } 287 /* insert local matrix entries */ 288 rptr = sbuf_j; 289 cols = sbuf_j + rend-rstart + 1; 290 vals = sbuf_a; 291 for (i=0; i<rend-rstart; i++){ 292 row = i + rstart; 293 ncols = rptr[i+1] - rptr[i]; 294 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 295 vals += ncols; 296 cols += ncols; 297 } 298 /* insert received matrix entries */ 299 for (imdex=0; imdex<nrecvs; imdex++){ 300 301 rstart = rowrange[recv_rank[imdex]]; 302 rend = rowrange[recv_rank[imdex]+1]; 303 rptr = rbuf_j[imdex]; 304 cols = rbuf_j[imdex] + rend-rstart + 1; 305 vals = rbuf_a[imdex]; 306 for (i=0; i<rend-rstart; i++){ 307 row = i + rstart; 308 ncols = rptr[i+1] - rptr[i]; 309 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 310 vals += ncols; 311 cols += ncols; 312 } 313 } 314 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 316 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 317 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 318 if (reuse == MAT_INITIAL_MATRIX){ 319 *matredundant = C; 320 } 321 322 /* free space */ 323 ierr = PetscFree(send_rank);CHKERRQ(ierr); 324 ierr = PetscFree(sbuf_j);CHKERRQ(ierr); 325 ierr = PetscFree(sbuf_a);CHKERRQ(ierr); 326 for (i=0; i<nrecvs; i++){ 327 ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr); 328 ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr); 329 } 330 ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "PCSetUp_Redundant" 336 static PetscErrorCode PCSetUp_Redundant(PC pc) 337 { 338 PC_Redundant *red = (PC_Redundant*)pc->data; 339 PetscErrorCode ierr; 340 PetscInt mstart,mend,mlocal,m; 341 PetscMPIInt size; 342 IS isl; 343 MatReuse reuse = MAT_INITIAL_MATRIX; 344 MatStructure str = DIFFERENT_NONZERO_PATTERN; 345 MPI_Comm comm; 346 Vec vec; 347 348 PetscInt mlocal_sub; 349 PetscMPIInt subsize,subrank; 350 PetscInt rstart_sub,rend_sub,mloc_sub; 351 352 PetscFunctionBegin; 353 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 354 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 355 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 356 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 357 if (!pc->setupcalled) { 358 /* create working vectors xsub/ysub and xdup/ydup */ 359 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 360 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 361 362 /* get local size of xsub/ysub */ 363 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 364 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 365 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 366 if (subrank+1 < subsize){ 367 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 368 } else { 369 rend_sub = m; 370 } 371 mloc_sub = rend_sub - rstart_sub; 372 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 373 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 374 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 375 376 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 377 Note: we use communicator dupcomm, not pc->comm! */ 378 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 379 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 380 381 /* create vec scatters */ 382 if (!red->scatterin){ 383 IS is1,is2; 384 PetscInt *idx1,*idx2,i,j,k; 385 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 386 idx2 = idx1 + red->nsubcomm*mlocal; 387 j = 0; 388 for (k=0; k<red->nsubcomm; k++){ 389 for (i=mstart; i<mend; i++){ 390 idx1[j] = i; 391 idx2[j++] = i + m*k; 392 } 393 } 394 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 395 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 396 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 397 ierr = ISDestroy(is1);CHKERRQ(ierr); 398 ierr = ISDestroy(is2);CHKERRQ(ierr); 399 400 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 401 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 402 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 403 ierr = ISDestroy(is1);CHKERRQ(ierr); 404 ierr = ISDestroy(is2);CHKERRQ(ierr); 405 ierr = PetscFree(idx1);CHKERRQ(ierr); 406 } 407 } 408 ierr = VecDestroy(vec);CHKERRQ(ierr); 409 410 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 412 if (size == 1) { 413 red->useparallelmat = PETSC_FALSE; 414 } 415 416 if (red->useparallelmat) { 417 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 418 /* destroy old matrices */ 419 if (red->pmats) { 420 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 421 } 422 } else if (pc->setupcalled == 1) { 423 reuse = MAT_REUSE_MATRIX; 424 str = SAME_NONZERO_PATTERN; 425 } 426 427 /* grab the parallel matrix and put it into processors of a subcomminicator */ 428 /*--------------------------------------------------------------------------*/ 429 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 430 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats_sub);CHKERRQ(ierr); 431 /* tell PC of the subcommunicator its operators */ 432 ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr); 433 } else { 434 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 435 } 436 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 437 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "PCApply_Redundant" 443 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 444 { 445 PC_Redundant *red = (PC_Redundant*)pc->data; 446 PetscErrorCode ierr; 447 PetscScalar *array; 448 449 PetscFunctionBegin; 450 /* scatter x to xdup */ 451 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 452 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 453 454 /* place xdup's local array into xsub */ 455 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 456 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 457 458 /* apply preconditioner on each processor */ 459 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 460 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 461 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 462 463 /* place ysub's local array into ydup */ 464 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 465 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 466 467 /* scatter ydup to y */ 468 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 469 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 470 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 471 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "PCDestroy_Redundant" 477 static PetscErrorCode PCDestroy_Redundant(PC pc) 478 { 479 PC_Redundant *red = (PC_Redundant*)pc->data; 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 484 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 485 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 486 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 487 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 488 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 489 if (red->pmats_sub) { 490 ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr); 491 } 492 493 if (red->pmats) { 494 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 495 } 496 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 497 ierr = PetscFree(red);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNCT__ 502 #define __FUNCT__ "PCSetFromOptions_Redundant" 503 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 504 { 505 PetscFunctionBegin; 506 PetscFunctionReturn(0); 507 } 508 509 EXTERN_C_BEGIN 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 512 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 513 { 514 PC_Redundant *red = (PC_Redundant*)pc->data; 515 PetscErrorCode ierr; 516 517 PetscFunctionBegin; 518 red->scatterin = in; 519 red->scatterout = out; 520 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 521 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 EXTERN_C_END 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCRedundantSetScatter" 528 /*@ 529 PCRedundantSetScatter - Sets the scatter used to copy values into the 530 redundant local solve and the scatter to move them back into the global 531 vector. 532 533 Collective on PC 534 535 Input Parameters: 536 + pc - the preconditioner context 537 . in - the scatter to move the values in 538 - out - the scatter to move them out 539 540 Level: advanced 541 542 .keywords: PC, redundant solve 543 @*/ 544 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 545 { 546 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 547 548 PetscFunctionBegin; 549 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 550 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 551 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 552 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 553 if (f) { 554 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 555 } 556 PetscFunctionReturn(0); 557 } 558 559 EXTERN_C_BEGIN 560 #undef __FUNCT__ 561 #define __FUNCT__ "PCRedundantGetPC_Redundant" 562 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 563 { 564 PC_Redundant *red = (PC_Redundant*)pc->data; 565 566 PetscFunctionBegin; 567 *innerpc = red->pc; 568 PetscFunctionReturn(0); 569 } 570 EXTERN_C_END 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "PCRedundantGetPC" 574 /*@ 575 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 576 577 Not Collective 578 579 Input Parameter: 580 . pc - the preconditioner context 581 582 Output Parameter: 583 . innerpc - the sequential PC 584 585 Level: advanced 586 587 .keywords: PC, redundant solve 588 @*/ 589 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 590 { 591 PetscErrorCode ierr,(*f)(PC,PC*); 592 593 PetscFunctionBegin; 594 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 595 PetscValidPointer(innerpc,2); 596 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 597 if (f) { 598 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 599 } 600 PetscFunctionReturn(0); 601 } 602 603 EXTERN_C_BEGIN 604 #undef __FUNCT__ 605 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 606 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 607 { 608 PC_Redundant *red = (PC_Redundant*)pc->data; 609 610 PetscFunctionBegin; 611 if (mat) *mat = red->pmats[0]; 612 if (pmat) *pmat = red->pmats[0]; 613 PetscFunctionReturn(0); 614 } 615 EXTERN_C_END 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "PCRedundantGetOperators" 619 /*@ 620 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 621 622 Not Collective 623 624 Input Parameter: 625 . pc - the preconditioner context 626 627 Output Parameters: 628 + mat - the matrix 629 - pmat - the (possibly different) preconditioner matrix 630 631 Level: advanced 632 633 .keywords: PC, redundant solve 634 @*/ 635 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 636 { 637 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 638 639 PetscFunctionBegin; 640 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 641 if (mat) PetscValidPointer(mat,2); 642 if (pmat) PetscValidPointer(pmat,3); 643 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 644 if (f) { 645 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 /* -------------------------------------------------------------------------------------*/ 651 /*MC 652 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 653 654 655 Options for the redundant preconditioners can be set with -redundant_pc_xxx 656 657 Level: intermediate 658 659 660 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 661 PCRedundantGetPC(), PCRedundantGetOperators() 662 663 M*/ 664 665 EXTERN_C_BEGIN 666 #undef __FUNCT__ 667 #define __FUNCT__ "PCCreate_Redundant" 668 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 669 { 670 PetscErrorCode ierr; 671 PC_Redundant *red; 672 const char *prefix; 673 674 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 675 PetscMPIInt rank,size,subrank,*subsize; 676 MPI_Comm subcomm; 677 PetscMPIInt duprank; 678 PetscMPIInt rank_dup,size_dup; 679 MPI_Comm dupcomm; 680 681 PetscFunctionBegin; 682 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 683 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 684 red->useparallelmat = PETSC_TRUE; 685 686 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 687 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 688 nsubcomm = size; 689 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 690 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 691 692 /* get size of each subcommunicators */ 693 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 694 np_subcomm = size/nsubcomm; 695 nleftover = size - nsubcomm*np_subcomm; 696 for (i=0; i<nsubcomm; i++){ 697 subsize[i] = np_subcomm; 698 if (i<nleftover) subsize[i]++; 699 } 700 701 /* find color for this proc */ 702 color = rank%nsubcomm; 703 subrank = rank/nsubcomm; 704 705 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 706 red->subcomm = subcomm; 707 red->color = color; 708 red->nsubcomm = nsubcomm; 709 710 j = 0; duprank = 0; 711 for (i=0; i<nsubcomm; i++){ 712 if (j == color){ 713 duprank += subrank; 714 break; 715 } 716 duprank += subsize[i]; j++; 717 } 718 /* 719 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 720 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 721 */ 722 723 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 724 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 725 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 726 /* 727 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 728 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 729 */ 730 red->dupcomm = dupcomm; 731 ierr = PetscFree(subsize);CHKERRQ(ierr); 732 733 /* create the sequential PC that each processor has copy of */ 734 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 735 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 736 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 737 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 738 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 739 740 pc->ops->apply = PCApply_Redundant; 741 pc->ops->applytranspose = 0; 742 pc->ops->setup = PCSetUp_Redundant; 743 pc->ops->destroy = PCDestroy_Redundant; 744 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 745 pc->ops->setuponblocks = 0; 746 pc->ops->view = PCView_Redundant; 747 pc->ops->applyrichardson = 0; 748 749 pc->data = (void*)red; 750 751 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 752 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 754 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 756 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 757 758 PetscFunctionReturn(0); 759 } 760 EXTERN_C_END 761