1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat *pmats; /* matrix and optional preconditioner matrix */ 14 Mat pmats_sub; /* matrix and optional preconditioner matrix */ 15 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 16 PetscTruth useparallelmat; 17 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 18 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 19 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 20 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 21 PetscInt color; /* color of processors in a subcommunicator */ 22 } PC_Redundant; 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "PCView_Redundant" 26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 27 { 28 PC_Redundant *red = (PC_Redundant*)pc->data; 29 PetscErrorCode ierr; 30 PetscMPIInt rank; 31 PetscTruth iascii,isstring; 32 PetscViewer sviewer; 33 34 PetscFunctionBegin; 35 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 37 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 38 if (iascii) { 39 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 40 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 41 if (!rank) { 42 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 43 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 44 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 45 } 46 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 47 } else if (isstring) { 48 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 49 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 50 if (!rank) { 51 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 54 } else { 55 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 61 #include "private/vecimpl.h" 62 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 63 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatGetRedundantMatrix" 67 PetscErrorCode MatGetRedundantMatrix_AIJ(PC pc,Mat mat,MPI_Comm subcomm,PetscInt mlocal_sub,Mat *matredundant) 68 { 69 PetscMPIInt rank,size,subrank,subsize; 70 MPI_Comm comm=mat->comm; 71 PetscErrorCode ierr; 72 PC_Redundant *red=(PC_Redundant*)pc->data; 73 PetscInt nsubcomm=red->nsubcomm,nsends,nrecvs,i,prid=100,itmp; 74 PetscMPIInt *send_rank,*recv_rank; 75 PetscInt *rowrange=pc->pmat->rmap.range,nzlocal; 76 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 77 Mat A=aij->A,B=aij->B; 78 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 79 Mat C; 80 81 PetscInt nleftover,np_subcomm,j; 82 PetscInt nz_A,nz_B,*sbuf_j; 83 PetscScalar *sbuf_a; 84 PetscInt cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 85 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 86 PetscInt *cols,ctmp,lwrite,*rptr,l; 87 PetscScalar *vals,*aworkA,*aworkB; 88 89 PetscMPIInt tag1,tag2,tag3,imdex; 90 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 91 MPI_Status recv_status,*send_status; 92 PetscInt *sbuf_nz,*rbuf_nz,count; 93 94 PetscInt **rbuf_j; 95 PetscScalar **rbuf_a; 96 97 PetscFunctionBegin; 98 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 99 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 101 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 102 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 103 /* 104 ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize); 105 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 106 */ 107 /* get the destination processors */ 108 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 109 recv_rank = send_rank + size; 110 np_subcomm = size/nsubcomm; 111 nleftover = size - nsubcomm*np_subcomm; 112 nsends = 0; nrecvs = 0; 113 for (i=0; i<size; i++){ /* i=rank*/ 114 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 115 send_rank[nsends] = i; nsends++; 116 recv_rank[nrecvs++] = i; 117 } 118 } 119 if (rank >= size - nleftover){/* this proc is a leftover processor */ 120 i = size-nleftover-1; 121 j = 0; 122 while (j < nsubcomm - nleftover){ 123 send_rank[nsends++] = i; 124 i--; j++; 125 } 126 } 127 128 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 129 for (i=0; i<nleftover; i++){ 130 recv_rank[nrecvs++] = size-nleftover+i; 131 } 132 } 133 if (prid == rank){ 134 printf("[%d] sends to ",rank); 135 for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]); 136 printf(" \n"); 137 } 138 /* 139 ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs); 140 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 141 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 142 */ 143 144 /* get this processor's nzlocal=nz_A+nz_B */ 145 nz_A = a->nz; nz_B = b->nz; 146 nzlocal = nz_A + nz_B; 147 148 /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */ 149 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 150 ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 151 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 152 153 rptr = sbuf_j; 154 cols = sbuf_j + rend-rstart + 1; 155 vals = sbuf_a; 156 rptr[0] = 0; 157 for (i=0; i<rend-rstart; i++){ 158 row = i + rstart; 159 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 160 ncols = nzA + nzB; 161 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 162 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 163 /* load the column indices for this row into cols */ 164 lwrite = 0; 165 for (l=0; l<nzB; l++) { 166 if ((ctmp = bmap[cworkB[l]]) < cstart){ 167 vals[lwrite] = aworkB[l]; 168 cols[lwrite++] = ctmp; 169 } 170 } 171 for (l=0; l<nzA; l++){ 172 vals[lwrite] = aworkA[l]; 173 cols[lwrite++] = cstart + cworkA[l]; 174 } 175 for (l=0; l<nzB; l++) { 176 if ((ctmp = bmap[cworkB[l]]) >= cend){ 177 vals[lwrite] = aworkB[l]; 178 cols[lwrite++] = ctmp; 179 } 180 } 181 /* insert local matrix values into C */ 182 /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */ 183 184 vals += ncols; 185 cols += ncols; 186 rptr[i+1] = rptr[i] + ncols; 187 } 188 /* 189 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 190 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 191 */ 192 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 193 194 /* send nzlocal to others, and recv other's nzlocal */ 195 /*--------------------------------------------------*/ 196 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 197 rbuf_nz = sbuf_nz + nsends; 198 199 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 200 s_waits2 = s_waits1 + nsends; 201 s_waits3 = s_waits2 + nsends; 202 r_waits1 = s_waits3 + nsends; 203 r_waits2 = r_waits1 + nrecvs; 204 r_waits3 = r_waits2 + nrecvs; 205 206 /* get some new tags to keep the communication clean */ 207 ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr); 208 ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr); 209 ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr); 210 211 /* post receives of other's nzlocal */ 212 for (i=0; i<nrecvs; i++){ 213 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 214 } 215 216 /* send nzlocal to others */ 217 for (i=0; i<nsends; i++){ 218 sbuf_nz[i] = nzlocal; 219 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 220 if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]); 221 } 222 223 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 224 count = nrecvs; 225 while (count) { 226 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 227 recv_rank[imdex] = recv_status.MPI_SOURCE; 228 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */ 229 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 230 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 231 232 itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 233 rbuf_nz[imdex] += itmp+2; 234 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 235 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 236 count--; 237 } 238 239 /* wait on sends of nzlocal */ 240 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 241 242 /* send mat->i,j and mat->a to others, and recv from other's */ 243 /*-----------------------------------------------------------*/ 244 for (i=0; i<nsends; i++){ 245 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 246 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 247 ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 248 } 249 250 /* wait on receives of mat->i,j and mat->a */ 251 /*-----------------------------------------*/ 252 count = nrecvs; 253 while (count) { 254 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 255 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 256 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 257 count--; 258 } 259 260 /* wait on sends of mat->i,j and mat->a */ 261 /*--------------------------------------*/ 262 if (nsends) { 263 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 264 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 265 } 266 ierr = PetscFree(sbuf_nz);CHKERRQ(ierr); 267 ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr); 268 269 /* create redundant matrix */ 270 /*-------------------------*/ 271 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 272 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 273 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 274 275 /* insert local matrix entries */ 276 rptr = sbuf_j; 277 cols = sbuf_j + rend-rstart + 1; 278 vals = sbuf_a; 279 for (i=0; i<rend-rstart; i++){ 280 row = i + rstart; 281 ncols = rptr[i+1] - rptr[i]; 282 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 283 vals += ncols; 284 cols += ncols; 285 } 286 /* insert received matrix entries */ 287 for (imdex=0; imdex<nrecvs; imdex++){ 288 289 rstart = rowrange[recv_rank[imdex]]; 290 rend = rowrange[recv_rank[imdex]+1]; 291 rptr = rbuf_j[imdex]; 292 cols = rbuf_j[imdex] + rend-rstart + 1; 293 vals = rbuf_a[imdex]; 294 for (i=0; i<rend-rstart; i++){ 295 row = i + rstart; 296 ncols = rptr[i+1] - rptr[i]; 297 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 298 vals += ncols; 299 cols += ncols; 300 } 301 } 302 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 305 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 306 *matredundant = C; 307 308 /* free space */ 309 ierr = PetscFree(send_rank);CHKERRQ(ierr); 310 ierr = PetscFree(sbuf_j);CHKERRQ(ierr); 311 ierr = PetscFree(sbuf_a);CHKERRQ(ierr); 312 for (i=0; i<nrecvs; i++){ 313 ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr); 314 ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr); 315 } 316 ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNCT__ 321 #define __FUNCT__ "PCSetUp_Redundant" 322 static PetscErrorCode PCSetUp_Redundant(PC pc) 323 { 324 PC_Redundant *red = (PC_Redundant*)pc->data; 325 PetscErrorCode ierr; 326 PetscInt mstart,mend,mlocal,m; 327 PetscMPIInt size; 328 IS isl; 329 MatReuse reuse = MAT_INITIAL_MATRIX; 330 MatStructure str = DIFFERENT_NONZERO_PATTERN; 331 MPI_Comm comm; 332 Vec vec; 333 334 PetscInt mlocal_sub; 335 PetscMPIInt subsize,subrank; 336 PetscInt rstart_sub,rend_sub,mloc_sub; 337 Mat Aredundant; 338 339 PetscFunctionBegin; 340 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 341 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 342 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 343 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 344 if (!pc->setupcalled) { 345 /* create working vectors xsub/ysub and xdup/ydup */ 346 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 347 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 348 349 /* get local size of xsub/ysub */ 350 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 351 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 352 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 353 if (subrank+1 < subsize){ 354 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 355 } else { 356 rend_sub = m; 357 } 358 mloc_sub = rend_sub - rstart_sub; 359 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 360 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 361 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 362 363 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 364 Note: we use communicator dupcomm, not pc->comm! */ 365 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 366 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 367 368 /* create vec scatters */ 369 if (!red->scatterin){ 370 IS is1,is2; 371 PetscInt *idx1,*idx2,i,j,k; 372 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 373 idx2 = idx1 + red->nsubcomm*mlocal; 374 j = 0; 375 for (k=0; k<red->nsubcomm; k++){ 376 for (i=mstart; i<mend; i++){ 377 idx1[j] = i; 378 idx2[j++] = i + m*k; 379 } 380 } 381 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 382 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 383 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 384 ierr = ISDestroy(is1);CHKERRQ(ierr); 385 ierr = ISDestroy(is2);CHKERRQ(ierr); 386 387 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 388 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 389 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 390 ierr = ISDestroy(is1);CHKERRQ(ierr); 391 ierr = ISDestroy(is2);CHKERRQ(ierr); 392 ierr = PetscFree(idx1);CHKERRQ(ierr); 393 } 394 } 395 ierr = VecDestroy(vec);CHKERRQ(ierr); 396 397 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 399 if (size == 1) { 400 red->useparallelmat = PETSC_FALSE; 401 } 402 403 if (red->useparallelmat) { 404 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 405 /* destroy old matrices */ 406 if (red->pmats) { 407 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 408 } 409 } else if (pc->setupcalled == 1) { 410 reuse = MAT_REUSE_MATRIX; 411 str = SAME_NONZERO_PATTERN; 412 } 413 414 /* ================== matrix ============= */ 415 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 416 ierr = MatGetRedundantMatrix_AIJ(pc,pc->pmat,red->subcomm,mlocal_sub,&Aredundant);CHKERRQ(ierr); 417 418 /* grab the parallel matrix and put it into processors of a subcomminicator */ 419 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 420 ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr); 421 ierr = ISDestroy(isl);CHKERRQ(ierr); 422 423 /* ------- temporarily set a mpi matrix pmats_sub- provided by user! --*/ 424 ierr = MatCreate(red->subcomm,&red->pmats_sub);CHKERRQ(ierr); 425 ierr = MatSetSizes(red->pmats_sub,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 426 ierr = MatSetFromOptions(red->pmats_sub);CHKERRQ(ierr); 427 { 428 PetscInt Istart,Iend,ncols,i; 429 const PetscInt *cols; 430 const PetscScalar *vals; 431 PetscTruth flg; 432 ierr = MatGetOwnershipRange(red->pmats_sub,&Istart,&Iend);CHKERRQ(ierr); 433 for (i=Istart; i<Iend; i++) { 434 ierr = MatGetRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr); 435 ierr = MatSetValues(red->pmats_sub,1,&i,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = MatRestoreRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr); 437 } 438 ierr = MatAssemblyBegin(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 439 ierr = MatAssemblyEnd(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 440 441 ierr = MatEqual(red->pmats_sub,Aredundant,&flg);CHKERRQ(ierr); 442 if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"pmats_sub !=Aredundant "); 443 ierr = MatDestroy(red->pmats_sub); 444 } 445 red->pmats_sub = Aredundant; 446 447 /* tell PC of the subcommunicator its operators */ 448 ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr); 449 } else { 450 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 451 } 452 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 453 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCApply_Redundant" 459 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 460 { 461 PC_Redundant *red = (PC_Redundant*)pc->data; 462 PetscErrorCode ierr; 463 PetscScalar *array; 464 465 PetscFunctionBegin; 466 /* scatter x to xdup */ 467 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 468 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 469 470 /* place xdup's local array into xsub */ 471 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 472 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 473 474 /* apply preconditioner on each processor */ 475 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 476 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 477 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 478 479 /* place ysub's local array into ydup */ 480 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 481 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 482 483 /* scatter ydup to y */ 484 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 485 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 486 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 487 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "PCDestroy_Redundant" 493 static PetscErrorCode PCDestroy_Redundant(PC pc) 494 { 495 PC_Redundant *red = (PC_Redundant*)pc->data; 496 PetscErrorCode ierr; 497 498 PetscFunctionBegin; 499 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 500 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 501 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 502 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 503 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 504 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 505 if (red->pmats_sub) { 506 ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr); 507 } 508 509 if (red->pmats) { 510 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 511 } 512 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 513 ierr = PetscFree(red);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCSetFromOptions_Redundant" 519 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 520 { 521 PetscFunctionBegin; 522 PetscFunctionReturn(0); 523 } 524 525 EXTERN_C_BEGIN 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 528 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 529 { 530 PC_Redundant *red = (PC_Redundant*)pc->data; 531 PetscErrorCode ierr; 532 533 PetscFunctionBegin; 534 red->scatterin = in; 535 red->scatterout = out; 536 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 537 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 EXTERN_C_END 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "PCRedundantSetScatter" 544 /*@ 545 PCRedundantSetScatter - Sets the scatter used to copy values into the 546 redundant local solve and the scatter to move them back into the global 547 vector. 548 549 Collective on PC 550 551 Input Parameters: 552 + pc - the preconditioner context 553 . in - the scatter to move the values in 554 - out - the scatter to move them out 555 556 Level: advanced 557 558 .keywords: PC, redundant solve 559 @*/ 560 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 561 { 562 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 563 564 PetscFunctionBegin; 565 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 566 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 567 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 568 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 569 if (f) { 570 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 571 } 572 PetscFunctionReturn(0); 573 } 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "PCRedundantGetPC_Redundant" 578 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 579 { 580 PC_Redundant *red = (PC_Redundant*)pc->data; 581 582 PetscFunctionBegin; 583 *innerpc = red->pc; 584 PetscFunctionReturn(0); 585 } 586 EXTERN_C_END 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "PCRedundantGetPC" 590 /*@ 591 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 592 593 Not Collective 594 595 Input Parameter: 596 . pc - the preconditioner context 597 598 Output Parameter: 599 . innerpc - the sequential PC 600 601 Level: advanced 602 603 .keywords: PC, redundant solve 604 @*/ 605 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 606 { 607 PetscErrorCode ierr,(*f)(PC,PC*); 608 609 PetscFunctionBegin; 610 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 611 PetscValidPointer(innerpc,2); 612 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 613 if (f) { 614 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 615 } 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 622 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 623 { 624 PC_Redundant *red = (PC_Redundant*)pc->data; 625 626 PetscFunctionBegin; 627 if (mat) *mat = red->pmats[0]; 628 if (pmat) *pmat = red->pmats[0]; 629 PetscFunctionReturn(0); 630 } 631 EXTERN_C_END 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "PCRedundantGetOperators" 635 /*@ 636 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 637 638 Not Collective 639 640 Input Parameter: 641 . pc - the preconditioner context 642 643 Output Parameters: 644 + mat - the matrix 645 - pmat - the (possibly different) preconditioner matrix 646 647 Level: advanced 648 649 .keywords: PC, redundant solve 650 @*/ 651 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 652 { 653 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 654 655 PetscFunctionBegin; 656 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 657 if (mat) PetscValidPointer(mat,2); 658 if (pmat) PetscValidPointer(pmat,3); 659 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 660 if (f) { 661 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 /* -------------------------------------------------------------------------------------*/ 667 /*MC 668 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 669 670 671 Options for the redundant preconditioners can be set with -redundant_pc_xxx 672 673 Level: intermediate 674 675 676 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 677 PCRedundantGetPC(), PCRedundantGetOperators() 678 679 M*/ 680 681 EXTERN_C_BEGIN 682 #undef __FUNCT__ 683 #define __FUNCT__ "PCCreate_Redundant" 684 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 685 { 686 PetscErrorCode ierr; 687 PC_Redundant *red; 688 const char *prefix; 689 690 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 691 PetscMPIInt rank,size,subrank,*subsize; 692 MPI_Comm subcomm; 693 PetscMPIInt duprank; 694 PetscMPIInt rank_dup,size_dup; 695 MPI_Comm dupcomm; 696 697 PetscFunctionBegin; 698 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 699 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 700 red->useparallelmat = PETSC_TRUE; 701 702 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 703 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 704 nsubcomm = size; 705 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 706 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 707 708 /* get size of each subcommunicators */ 709 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 710 np_subcomm = size/nsubcomm; 711 nleftover = size - nsubcomm*np_subcomm; 712 for (i=0; i<nsubcomm; i++){ 713 subsize[i] = np_subcomm; 714 if (i<nleftover) subsize[i]++; 715 } 716 717 /* find color for this proc */ 718 color = rank%nsubcomm; 719 subrank = rank/nsubcomm; 720 721 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 722 red->subcomm = subcomm; 723 red->color = color; 724 red->nsubcomm = nsubcomm; 725 726 j = 0; duprank = 0; 727 for (i=0; i<nsubcomm; i++){ 728 if (j == color){ 729 duprank += subrank; 730 break; 731 } 732 duprank += subsize[i]; j++; 733 } 734 /* 735 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 736 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 737 */ 738 739 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 740 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 741 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 742 /* 743 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 744 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 745 */ 746 red->dupcomm = dupcomm; 747 ierr = PetscFree(subsize);CHKERRQ(ierr); 748 749 /* create the sequential PC that each processor has copy of */ 750 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 751 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 752 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 753 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 754 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 755 756 pc->ops->apply = PCApply_Redundant; 757 pc->ops->applytranspose = 0; 758 pc->ops->setup = PCSetUp_Redundant; 759 pc->ops->destroy = PCDestroy_Redundant; 760 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 761 pc->ops->setuponblocks = 0; 762 pc->ops->view = PCView_Redundant; 763 pc->ops->applyrichardson = 0; 764 765 pc->data = (void*)red; 766 767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 768 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 770 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 772 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 773 774 PetscFunctionReturn(0); 775 } 776 EXTERN_C_END 777