1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 #undef CONTIGUOUS_COLOR 10 #define INTER_COLOR 11 12 typedef struct { 13 PC pc; /* actual preconditioner used on each processor */ 14 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 15 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 16 Mat *pmats; /* matrix and optional preconditioner matrix */ 17 Mat pmats_sub; /* matrix and optional preconditioner matrix */ 18 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 19 PetscTruth useparallelmat; 20 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 21 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 22 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 23 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 24 PetscInt color; /* color of processors in a subcommunicator */ 25 } PC_Redundant; 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "PCView_Redundant" 29 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 30 { 31 PC_Redundant *red = (PC_Redundant*)pc->data; 32 PetscErrorCode ierr; 33 PetscMPIInt rank; 34 PetscTruth iascii,isstring; 35 PetscViewer sviewer; 36 37 PetscFunctionBegin; 38 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 39 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 40 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 41 if (iascii) { 42 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 43 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 44 if (!rank) { 45 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 46 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 47 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 48 } 49 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 50 } else if (isstring) { 51 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 52 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 53 if (!rank) { 54 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 55 } 56 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 57 } else { 58 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 59 } 60 PetscFunctionReturn(0); 61 } 62 63 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 64 #include "private/vecimpl.h" 65 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 66 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatGetRedundantMatrix" 70 PetscErrorCode MatGetRedundantMatrix_AIJ(PC pc,Mat mat,MPI_Comm subcomm,PetscInt mlocal_sub,Mat *matredundant) 71 { 72 PetscMPIInt rank,size,subrank,subsize; 73 MPI_Comm comm=mat->comm; 74 PetscErrorCode ierr; 75 PC_Redundant *red=(PC_Redundant*)pc->data; 76 PetscInt nsubcomm=red->nsubcomm,nsends,nrecvs,i,prid=100,itmp; 77 PetscMPIInt *send_rank,*recv_rank; 78 PetscInt *rowrange=pc->pmat->rmap.range,mlocal_max,nzlocal; 79 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 80 Mat A=aij->A,B=aij->B; 81 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 82 Mat C; 83 84 PetscInt nleftover,np_subcomm,j; 85 PetscInt nz_A,nz_B,*sbuf_j; 86 PetscScalar *sbuf_a; 87 PetscInt cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 88 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 89 PetscInt *cols,ctmp,lwrite,*rptr,l; 90 PetscScalar *vals,*aworkA,*aworkB; 91 92 PetscMPIInt tag1,tag2,tag3,imdex; 93 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 94 MPI_Status recv_status,*send_status; 95 PetscInt *sbuf_nz,*rbuf_nz,count; 96 97 PetscInt **rbuf_j; 98 PetscScalar **rbuf_a; 99 100 PetscFunctionBegin; 101 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 102 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 104 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 105 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 106 /* 107 ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize); 108 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 109 */ 110 /* get the destination processors */ 111 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 112 recv_rank = send_rank + size; 113 np_subcomm = size/nsubcomm; 114 nleftover = size - nsubcomm*np_subcomm; 115 nsends = 0; nrecvs = 0; 116 for (i=0; i<size; i++){ /* i=rank*/ 117 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 118 send_rank[nsends] = i; nsends++; 119 recv_rank[nrecvs++] = i; 120 } 121 } 122 if (rank >= size - nleftover){/* this proc is a leftover processor */ 123 i = size-nleftover-1; 124 j = 0; 125 while (j < nsubcomm - nleftover){ 126 send_rank[nsends++] = i; 127 i--; j++; 128 } 129 } 130 131 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 132 for (i=0; i<nleftover; i++){ 133 recv_rank[nrecvs++] = size-nleftover+i; 134 } 135 } 136 if (prid == rank){ 137 printf("[%d] sends to ",rank); 138 for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]); 139 printf(" \n"); 140 } 141 /* 142 ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs); 143 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 144 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 145 */ 146 147 /* get this processor's nzlocal=nz_A+nz_B */ 148 nz_A = a->nz; nz_B = b->nz; 149 nzlocal = nz_A + nz_B; 150 151 /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */ 152 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 153 ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 154 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 155 156 rptr = sbuf_j; 157 cols = sbuf_j + rend-rstart + 1; 158 vals = sbuf_a; 159 rptr[0] = 0; 160 for (i=0; i<rend-rstart; i++){ 161 row = i + rstart; 162 if (rank == prid) printf(" \n row %d: ",row); 163 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 164 ncols = nzA + nzB; 165 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 166 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 167 /* load the column indices for this row into cols */ 168 lwrite = 0; 169 for (l=0; l<nzB; l++) { 170 if ((ctmp = bmap[cworkB[l]]) < cstart){ 171 vals[lwrite] = aworkB[l]; 172 cols[lwrite++] = ctmp; 173 if (rank == prid) printf(" (%d,%g)",ctmp,aworkB[l]); 174 } 175 } 176 for (l=0; l<nzA; l++){ 177 vals[lwrite] = aworkA[l]; 178 cols[lwrite++] = cstart + cworkA[l]; 179 if (rank == prid) printf(" (%d,%g)",cstart + cworkA[l],aworkA[l]); 180 } 181 for (l=0; l<nzB; l++) { 182 if ((ctmp = bmap[cworkB[l]]) >= cend){ 183 vals[lwrite] = aworkB[l]; 184 cols[lwrite++] = ctmp; 185 if (rank == prid) printf(" (%d,%g)",ctmp,aworkB[l]); 186 } 187 } 188 /* insert local matrix values into C */ 189 //ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 190 191 vals += ncols; 192 cols += ncols; 193 rptr[i+1] = rptr[i] + ncols; 194 } 195 /* 196 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198 */ 199 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 200 201 /* send nzlocal to others, and recv other's nzlocal */ 202 /*--------------------------------------------------*/ 203 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 204 rbuf_nz = sbuf_nz + nsends; 205 206 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 207 s_waits2 = s_waits1 + nsends; 208 s_waits3 = s_waits2 + nsends; 209 r_waits1 = s_waits3 + nsends; 210 r_waits2 = r_waits1 + nrecvs; 211 r_waits3 = r_waits2 + nrecvs; 212 213 /* get some new tags to keep the communication clean */ 214 ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr); 215 ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr); 216 ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr); 217 218 /* post receives of other's nzlocal */ 219 for (i=0; i<nrecvs; i++){ 220 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 221 } 222 223 /* send nzlocal to others */ 224 for (i=0; i<nsends; i++){ 225 sbuf_nz[i] = nzlocal; 226 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 227 if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]); 228 } 229 230 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 231 count = nrecvs; 232 while (count) { 233 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 234 recv_rank[imdex] = recv_status.MPI_SOURCE; 235 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */ 236 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 237 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 238 239 itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 240 rbuf_nz[imdex] += itmp+2; 241 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 242 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 243 count--; 244 } 245 246 /* wait on sends of nzlocal */ 247 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 248 249 /* send mat->i,j and mat->a to others, and recv from other's */ 250 /*-----------------------------------------------------------*/ 251 for (i=0; i<nsends; i++){ 252 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 253 itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 254 ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 255 } 256 257 /* wait on receives of mat->i,j and mat->a */ 258 /*-----------------------------------------*/ 259 count = nrecvs; 260 while (count) { 261 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 262 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 263 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 264 count--; 265 } 266 267 /* wait on sends of mat->i,j and mat->a */ 268 /*--------------------------------------*/ 269 if (nsends) { 270 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 271 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 272 } 273 ierr = PetscFree(sbuf_nz);CHKERRQ(ierr); 274 ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr); 275 276 /* create redundant matrix */ 277 /*-------------------------*/ 278 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 279 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 280 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 281 282 /* insert local matrix entries */ 283 rptr = sbuf_j; 284 cols = sbuf_j + rend-rstart + 1; 285 vals = sbuf_a; 286 for (i=0; i<rend-rstart; i++){ 287 row = i + rstart; 288 ncols = rptr[i+1] - rptr[i]; 289 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 290 vals += ncols; 291 cols += ncols; 292 } 293 /* insert received matrix entries */ 294 for (imdex=0; imdex<nrecvs; imdex++){ 295 296 rstart = rowrange[recv_rank[imdex]]; 297 rend = rowrange[recv_rank[imdex]+1]; 298 rptr = rbuf_j[imdex]; 299 cols = rbuf_j[imdex] + rend-rstart + 1; 300 vals = rbuf_a[imdex]; 301 for (i=0; i<rend-rstart; i++){ 302 row = i + rstart; 303 ncols = rptr[i+1] - rptr[i]; 304 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 305 vals += ncols; 306 cols += ncols; 307 } 308 } 309 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 311 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 312 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 313 *matredundant = C; 314 315 /* free space */ 316 ierr = PetscFree(send_rank);CHKERRQ(ierr); 317 ierr = PetscFree(sbuf_j);CHKERRQ(ierr); 318 ierr = PetscFree(sbuf_a);CHKERRQ(ierr); 319 for (i=0; i<nrecvs; i++){ 320 ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr); 321 ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr); 322 } 323 ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "PCSetUp_Redundant" 329 static PetscErrorCode PCSetUp_Redundant(PC pc) 330 { 331 PC_Redundant *red = (PC_Redundant*)pc->data; 332 PetscErrorCode ierr; 333 PetscInt mstart,mend,mlocal,m; 334 PetscMPIInt size; 335 IS isl; 336 MatReuse reuse = MAT_INITIAL_MATRIX; 337 MatStructure str = DIFFERENT_NONZERO_PATTERN; 338 MPI_Comm comm; 339 Vec vec; 340 341 PetscMPIInt rank,size_sub,itmp; 342 PetscInt mlocal_sub; 343 PetscMPIInt subsize,subrank; 344 PetscInt rstart_sub,rend_sub,mloc_sub; 345 Mat Aredundant; 346 347 PetscFunctionBegin; 348 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 349 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 350 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 351 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 352 if (!pc->setupcalled) { 353 /* create working vectors xsub/ysub and xdup/ydup */ 354 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 355 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 356 357 #ifdef INTER_COLOR 358 /* get local size of xsub/ysub */ 359 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 360 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 361 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 362 if (subrank+1 < subsize){ 363 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 364 } else { 365 rend_sub = m; 366 } 367 mloc_sub = rend_sub - rstart_sub; 368 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 369 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 370 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 371 #endif 372 #ifdef CONTIGUOUS_COLOR 373 ierr = VecCreateMPI(red->subcomm,PETSC_DECIDE,m,&red->ysub);CHKERRQ(ierr); 374 ierr = VecGetLocalSize(red->ysub,&mloc_sub);CHKERRQ(ierr); 375 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,m,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 376 #endif 377 378 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 379 Note: we use communicator dupcomm, not pc->comm! */ 380 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 381 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 382 383 /* create vec scatters */ 384 if (!red->scatterin){ 385 IS is1,is2; 386 PetscInt *idx1,*idx2,i,j,k; 387 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 388 idx2 = idx1 + red->nsubcomm*mlocal; 389 j = 0; 390 for (k=0; k<red->nsubcomm; k++){ 391 for (i=mstart; i<mend; i++){ 392 idx1[j] = i; 393 idx2[j++] = i + m*k; 394 } 395 } 396 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 397 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 398 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 399 ierr = ISDestroy(is1);CHKERRQ(ierr); 400 ierr = ISDestroy(is2);CHKERRQ(ierr); 401 402 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 403 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 404 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 405 ierr = ISDestroy(is1);CHKERRQ(ierr); 406 ierr = ISDestroy(is2);CHKERRQ(ierr); 407 ierr = PetscFree(idx1);CHKERRQ(ierr); 408 } 409 } 410 ierr = VecDestroy(vec);CHKERRQ(ierr); 411 412 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 413 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 414 if (size == 1) { 415 red->useparallelmat = PETSC_FALSE; 416 } 417 418 if (red->useparallelmat) { 419 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 420 /* destroy old matrices */ 421 if (red->pmats) { 422 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 423 } 424 } else if (pc->setupcalled == 1) { 425 reuse = MAT_REUSE_MATRIX; 426 str = SAME_NONZERO_PATTERN; 427 } 428 429 /* ================== matrix ============= */ 430 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 431 ierr = MatGetRedundantMatrix_AIJ(pc,pc->pmat,red->subcomm,mlocal_sub,&Aredundant);CHKERRQ(ierr); 432 433 /* grab the parallel matrix and put it into processors of a subcomminicator */ 434 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 435 ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr); 436 ierr = ISDestroy(isl);CHKERRQ(ierr); 437 438 /* ------- temporarily set a mpi matrix pmats_sub- provided by user! --*/ 439 ierr = MatCreate(red->subcomm,&red->pmats_sub);CHKERRQ(ierr); 440 ierr = MatSetSizes(red->pmats_sub,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 441 ierr = MatSetFromOptions(red->pmats_sub);CHKERRQ(ierr); 442 { 443 PetscInt Istart,Iend,ncols,i; 444 const PetscInt *cols; 445 const PetscScalar *vals; 446 PetscTruth flg; 447 ierr = MatGetOwnershipRange(red->pmats_sub,&Istart,&Iend);CHKERRQ(ierr); 448 for (i=Istart; i<Iend; i++) { 449 ierr = MatGetRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr); 450 ierr = MatSetValues(red->pmats_sub,1,&i,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = MatRestoreRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr); 452 } 453 ierr = MatAssemblyBegin(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454 ierr = MatAssemblyEnd(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 456 ierr = MatEqual(red->pmats_sub,Aredundant,&flg);CHKERRQ(ierr); 457 if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"pmats_sub !=Aredundant "); 458 ierr = MatDestroy(red->pmats_sub); 459 } 460 red->pmats_sub = Aredundant; 461 462 /* tell PC of the subcommunicator its operators */ 463 ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr); 464 } else { 465 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 466 } 467 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 468 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "PCApply_Redundant" 475 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 476 { 477 PC_Redundant *red = (PC_Redundant*)pc->data; 478 PetscErrorCode ierr; 479 PetscScalar *array; 480 481 PetscFunctionBegin; 482 /* scatter x to xdup */ 483 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 484 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 485 486 /* place xdup's local array into xsub */ 487 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 488 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 489 490 /* apply preconditioner on each processor */ 491 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 492 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 493 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 494 495 /* place ysub's local array into ydup */ 496 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 497 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 498 499 /* scatter ydup to y */ 500 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 501 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 502 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 503 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "PCDestroy_Redundant" 509 static PetscErrorCode PCDestroy_Redundant(PC pc) 510 { 511 PC_Redundant *red = (PC_Redundant*)pc->data; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 516 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 517 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 518 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 519 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 520 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 521 if (red->pmats_sub) { 522 ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr); 523 } 524 525 if (red->pmats) { 526 ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 527 } 528 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 529 ierr = PetscFree(red);CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "PCSetFromOptions_Redundant" 535 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 536 { 537 PetscFunctionBegin; 538 PetscFunctionReturn(0); 539 } 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 544 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 545 { 546 PC_Redundant *red = (PC_Redundant*)pc->data; 547 PetscErrorCode ierr; 548 549 PetscFunctionBegin; 550 red->scatterin = in; 551 red->scatterout = out; 552 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 553 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 EXTERN_C_END 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "PCRedundantSetScatter" 560 /*@ 561 PCRedundantSetScatter - Sets the scatter used to copy values into the 562 redundant local solve and the scatter to move them back into the global 563 vector. 564 565 Collective on PC 566 567 Input Parameters: 568 + pc - the preconditioner context 569 . in - the scatter to move the values in 570 - out - the scatter to move them out 571 572 Level: advanced 573 574 .keywords: PC, redundant solve 575 @*/ 576 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 577 { 578 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 579 580 PetscFunctionBegin; 581 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 582 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 583 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 584 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 585 if (f) { 586 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 587 } 588 PetscFunctionReturn(0); 589 } 590 591 EXTERN_C_BEGIN 592 #undef __FUNCT__ 593 #define __FUNCT__ "PCRedundantGetPC_Redundant" 594 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 595 { 596 PC_Redundant *red = (PC_Redundant*)pc->data; 597 598 PetscFunctionBegin; 599 *innerpc = red->pc; 600 PetscFunctionReturn(0); 601 } 602 EXTERN_C_END 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "PCRedundantGetPC" 606 /*@ 607 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 608 609 Not Collective 610 611 Input Parameter: 612 . pc - the preconditioner context 613 614 Output Parameter: 615 . innerpc - the sequential PC 616 617 Level: advanced 618 619 .keywords: PC, redundant solve 620 @*/ 621 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 622 { 623 PetscErrorCode ierr,(*f)(PC,PC*); 624 625 PetscFunctionBegin; 626 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 627 PetscValidPointer(innerpc,2); 628 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 629 if (f) { 630 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 631 } 632 PetscFunctionReturn(0); 633 } 634 635 EXTERN_C_BEGIN 636 #undef __FUNCT__ 637 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 638 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 639 { 640 PC_Redundant *red = (PC_Redundant*)pc->data; 641 642 PetscFunctionBegin; 643 if (mat) *mat = red->pmats[0]; 644 if (pmat) *pmat = red->pmats[0]; 645 PetscFunctionReturn(0); 646 } 647 EXTERN_C_END 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "PCRedundantGetOperators" 651 /*@ 652 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 653 654 Not Collective 655 656 Input Parameter: 657 . pc - the preconditioner context 658 659 Output Parameters: 660 + mat - the matrix 661 - pmat - the (possibly different) preconditioner matrix 662 663 Level: advanced 664 665 .keywords: PC, redundant solve 666 @*/ 667 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 668 { 669 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 670 671 PetscFunctionBegin; 672 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 673 if (mat) PetscValidPointer(mat,2); 674 if (pmat) PetscValidPointer(pmat,3); 675 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 676 if (f) { 677 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 678 } 679 PetscFunctionReturn(0); 680 } 681 682 /* -------------------------------------------------------------------------------------*/ 683 /*MC 684 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 685 686 687 Options for the redundant preconditioners can be set with -redundant_pc_xxx 688 689 Level: intermediate 690 691 692 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 693 PCRedundantGetPC(), PCRedundantGetOperators() 694 695 M*/ 696 697 EXTERN_C_BEGIN 698 #undef __FUNCT__ 699 #define __FUNCT__ "PCCreate_Redundant" 700 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 701 { 702 PetscErrorCode ierr; 703 PC_Redundant *red; 704 const char *prefix; 705 706 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 707 PetscMPIInt rank,size,subrank,*subsize; 708 MPI_Comm subcomm; 709 PetscMPIInt duprank; 710 PetscMPIInt rank_dup,size_dup; 711 MPI_Comm dupcomm; 712 713 PetscFunctionBegin; 714 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 715 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 716 red->useparallelmat = PETSC_TRUE; 717 718 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 719 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 720 nsubcomm = size; 721 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 722 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 723 724 /* get size of each subcommunicators */ 725 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 726 np_subcomm = size/nsubcomm; 727 nleftover = size - nsubcomm*np_subcomm; 728 for (i=0; i<nsubcomm; i++){ 729 subsize[i] = np_subcomm; 730 if (i<nleftover) subsize[i]++; 731 } 732 733 /* find color for this proc */ 734 #ifdef INTER_COLOR 735 color = rank%nsubcomm; 736 subrank = rank/nsubcomm; 737 #endif 738 739 #ifdef CONTIGUOUS_COLOR 740 color = 0; subrank = 0; i = 0; j=0; 741 while (i < size){ 742 if (rank == i) break; /* my color is found */ 743 if (j >= subsize[color]-1){ /* next subcomm */ 744 j = -1; subrank = -1; color++; 745 } 746 i++; j++; subrank++; 747 } 748 #endif 749 750 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 751 red->subcomm = subcomm; 752 red->color = color; 753 red->nsubcomm = nsubcomm; 754 755 j = 0; duprank = 0; 756 for (i=0; i<nsubcomm; i++){ 757 if (j == color){ 758 duprank += subrank; 759 break; 760 } 761 duprank += subsize[i]; j++; 762 } 763 /* 764 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 765 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 766 */ 767 768 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 769 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 770 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 771 /* 772 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 773 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 774 */ 775 red->dupcomm = dupcomm; 776 ierr = PetscFree(subsize);CHKERRQ(ierr); 777 778 /* create the sequential PC that each processor has copy of */ 779 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 780 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 781 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 782 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 783 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 784 785 pc->ops->apply = PCApply_Redundant; 786 pc->ops->applytranspose = 0; 787 pc->ops->setup = PCSetUp_Redundant; 788 pc->ops->destroy = PCDestroy_Redundant; 789 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 790 pc->ops->setuponblocks = 0; 791 pc->ops->view = PCView_Redundant; 792 pc->ops->applyrichardson = 0; 793 794 pc->data = (void*)red; 795 796 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 797 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 798 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 799 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 801 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 802 803 PetscFunctionReturn(0); 804 } 805 EXTERN_C_END 806