1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 14 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 15 PetscTruth useparallelmat; 16 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 17 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 18 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 19 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 20 PetscInt color; /* color of processors in a subcommunicator */ 21 } PC_Redundant; 22 23 #include "src/sys/viewer/impls/ascii/asciiimpl.h" /*I "petsc.h" I*/ 24 #include "petscfix.h" 25 #include <stdarg.h> 26 27 PetscErrorCode PETSC_DLLEXPORT PetscViewerRestoreSubcomm(PetscViewer viewer,PetscViewer *outviewer) 28 { 29 PetscErrorCode ierr; 30 PetscMPIInt size; 31 32 PetscFunctionBegin; 33 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_COOKIE,1); 34 35 ierr = MPI_Comm_size(viewer->comm,&size);CHKERRQ(ierr); 36 if (size == 1) { 37 ierr = PetscObjectDereference((PetscObject)viewer);CHKERRQ(ierr); 38 if (outviewer) *outviewer = 0; 39 } else if (viewer->ops->restoresingleton) { 40 ierr = (*viewer->ops->restoresingleton)(viewer,outviewer);CHKERRQ(ierr); 41 } 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PetscViewerDestroy_ASCIIh" 47 PetscErrorCode PetscViewerDestroy_ASCIIh(PetscViewer viewer) 48 { 49 PetscMPIInt rank; 50 PetscErrorCode ierr; 51 PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data; 52 PetscViewerLink *vlink; 53 PetscTruth flg; 54 55 PetscFunctionBegin; 56 if (vascii->sviewer) { 57 SETERRQ(PETSC_ERR_ORDER,"ASCII PetscViewer destroyed before restoring singleton PetscViewer"); 58 } 59 ierr = MPI_Comm_rank(viewer->comm,&rank);CHKERRQ(ierr); 60 if (!rank && vascii->fd != stderr && vascii->fd != stdout) { 61 if (vascii->fd) fclose(vascii->fd); 62 if (vascii->storecompressed) { 63 char par[PETSC_MAX_PATH_LEN],buf[PETSC_MAX_PATH_LEN]; 64 FILE *fp; 65 ierr = PetscStrcpy(par,"gzip ");CHKERRQ(ierr); 66 ierr = PetscStrcat(par,vascii->filename);CHKERRQ(ierr); 67 #if defined(PETSC_HAVE_POPEN) 68 ierr = PetscPOpen(PETSC_COMM_SELF,PETSC_NULL,par,"r",&fp);CHKERRQ(ierr); 69 if (fgets(buf,1024,fp)) { 70 SETERRQ2(PETSC_ERR_LIB,"Error from compression command %s\n%s",par,buf); 71 } 72 ierr = PetscPClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr); 73 #else 74 SETERRQ(PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine"); 75 #endif 76 } 77 } 78 ierr = PetscStrfree(vascii->filename);CHKERRQ(ierr); 79 ierr = PetscFree(vascii);CHKERRQ(ierr); 80 81 /* remove the viewer from the list in the MPI Communicator */ 82 if (Petsc_Viewer_keyval == MPI_KEYVAL_INVALID) { 83 ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelViewer,&Petsc_Viewer_keyval,(void*)0);CHKERRQ(ierr); 84 } 85 86 ierr = MPI_Attr_get(viewer->comm,Petsc_Viewer_keyval,(void**)&vlink,(PetscMPIInt*)&flg);CHKERRQ(ierr); 87 if (flg) { 88 if (vlink && vlink->viewer == viewer) { 89 ierr = MPI_Attr_put(viewer->comm,Petsc_Viewer_keyval,vlink->next);CHKERRQ(ierr); 90 ierr = PetscFree(vlink);CHKERRQ(ierr); 91 } else { 92 while (vlink && vlink->next) { 93 if (vlink->next->viewer == viewer) { 94 PetscViewerLink *nv = vlink->next; 95 vlink->next = vlink->next->next; 96 ierr = PetscFree(nv);CHKERRQ(ierr); 97 } 98 vlink = vlink->next; 99 } 100 } 101 } 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PetscViewerDestroy_ASCII_Subcomm" 107 PetscErrorCode PetscViewerDestroy_ASCII_Subcomm(PetscViewer viewer) 108 { 109 PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data; 110 PetscErrorCode ierr; 111 PetscFunctionBegin; 112 ierr = PetscViewerRestoreSubcomm(vascii->bviewer,&viewer);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PetscViewerFlush_ASCII_Subcomm_0" 118 PetscErrorCode PetscViewerFlush_ASCII_Subcomm_0(PetscViewer viewer) 119 { 120 PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data; 121 122 PetscFunctionBegin; 123 fflush(vascii->fd); 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "PetscViewerGetSubcomm_ASCII" 129 PetscErrorCode PetscViewerGetSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer) 130 { 131 PetscMPIInt rank; 132 PetscErrorCode ierr; 133 PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data,*ovascii; 134 const char *name; 135 136 PetscFunctionBegin; 137 if (vascii->sviewer) { 138 SETERRQ(PETSC_ERR_ORDER,"Subcomm already obtained from PetscViewer and not restored"); 139 } 140 /* ierr = PetscViewerCreate(PETSC_COMM_SELF,outviewer);CHKERRQ(ierr); */ 141 ierr = PetscViewerCreate(subcomm,outviewer);CHKERRQ(ierr); 142 ierr = PetscViewerSetType(*outviewer,PETSC_VIEWER_ASCII);CHKERRQ(ierr); 143 ovascii = (PetscViewer_ASCII*)(*outviewer)->data; 144 ovascii->fd = vascii->fd; 145 ovascii->tab = vascii->tab; 146 147 vascii->sviewer = *outviewer; 148 149 (*outviewer)->format = viewer->format; 150 (*outviewer)->iformat = viewer->iformat; 151 152 ierr = PetscObjectGetName((PetscObject)viewer,&name);CHKERRQ(ierr); 153 ierr = PetscObjectSetName((PetscObject)(*outviewer),name);CHKERRQ(ierr); 154 155 ierr = MPI_Comm_rank(viewer->comm,&rank);CHKERRQ(ierr); 156 ((PetscViewer_ASCII*)((*outviewer)->data))->bviewer = viewer; 157 (*outviewer)->ops->destroy = PetscViewerDestroy_ASCII_Subcomm; 158 if (rank) { 159 (*outviewer)->ops->flush = 0; 160 } else { 161 (*outviewer)->ops->flush = PetscViewerFlush_ASCII_Subcomm_0; 162 } 163 PetscFunctionReturn(0); 164 } 165 166 #undef __FUNCT__ 167 #define __FUNCT__ "PetscViewerRestoreSubcomm_ASCII" 168 PetscErrorCode PetscViewerRestoreSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer) 169 { 170 PetscErrorCode ierr; 171 PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)(*outviewer)->data; 172 PetscViewer_ASCII *ascii = (PetscViewer_ASCII *)viewer->data; 173 174 PetscFunctionBegin; 175 if (!ascii->sviewer) { 176 SETERRQ(PETSC_ERR_ORDER,"Subcomm never obtained from PetscViewer"); 177 } 178 if (ascii->sviewer != *outviewer) { 179 SETERRQ(PETSC_ERR_ARG_WRONG,"This PetscViewer did not generate singleton"); 180 } 181 182 ascii->sviewer = 0; 183 vascii->fd = stdout; 184 (*outviewer)->ops->destroy = PetscViewerDestroy_ASCIIh; 185 ierr = PetscViewerDestroy(*outviewer);CHKERRQ(ierr); 186 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "PCView_Redundant" 192 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 193 { 194 PC_Redundant *red = (PC_Redundant*)pc->data; 195 PetscErrorCode ierr; 196 PetscMPIInt rank; 197 PetscTruth iascii,isstring; 198 PetscViewer sviewer,subviewer; 199 PetscInt color = red->color; 200 201 PetscFunctionBegin; 202 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 204 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 205 if (iascii) { 206 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 207 ierr = PetscViewerGetSubcomm_ASCII(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr); 208 /* ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); */ 209 if (!color) { 210 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 211 ierr = PCView(red->pc,subviewer);CHKERRQ(ierr); 212 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 213 } 214 /* ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); */ 215 ierr = PetscViewerRestoreSubcomm_ASCII(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr); 216 } else if (isstring) { 217 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 218 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 219 if (!rank) { 220 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 221 } 222 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 223 } else { 224 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 225 } 226 PetscFunctionReturn(0); 227 } 228 229 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 230 #include "private/vecimpl.h" 231 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 232 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 233 234 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 235 PetscInt nzlocal,nsends,nrecvs; 236 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 237 PetscScalar *sbuf_a,**rbuf_a; 238 PetscErrorCode (*MatDestroy)(Mat); 239 } Mat_Redundant; 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant" 243 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr) 244 { 245 PetscErrorCode ierr; 246 Mat_Redundant *redund=(Mat_Redundant*)ptr; 247 PetscInt i; 248 249 PetscFunctionBegin; 250 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 251 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 252 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 253 for (i=0; i<redund->nrecvs; i++){ 254 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 255 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 256 } 257 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 258 ierr = PetscFree(redund);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatDestroy_MatRedundant" 264 PetscErrorCode MatDestroy_MatRedundant(Mat A) 265 { 266 PetscErrorCode ierr; 267 PetscObjectContainer container; 268 Mat_Redundant *redund=PETSC_NULL; 269 270 PetscFunctionBegin; 271 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 272 if (container) { 273 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 274 } else { 275 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 276 } 277 A->ops->destroy = redund->MatDestroy; 278 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 279 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 280 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatGetRedundantMatrix" 286 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 287 { 288 PetscMPIInt rank,size; 289 MPI_Comm comm=mat->comm; 290 PetscErrorCode ierr; 291 PetscInt nsends,nrecvs,i,prid=100,rownz_max; 292 PetscMPIInt *send_rank,*recv_rank; 293 PetscInt *rowrange=mat->rmap.range; 294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 295 Mat A=aij->A,B=aij->B,C=*matredundant; 296 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 297 PetscScalar *sbuf_a; 298 PetscInt nzlocal=a->nz+b->nz; 299 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 300 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 301 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 302 PetscScalar *vals,*aworkA,*aworkB; 303 PetscMPIInt tag1,tag2,tag3,imdex; 304 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 305 MPI_Status recv_status,*send_status; 306 PetscInt *sbuf_nz,*rbuf_nz,count; 307 PetscInt **rbuf_j; 308 PetscScalar **rbuf_a; 309 Mat_Redundant *redund=PETSC_NULL; 310 PetscObjectContainer container; 311 312 PetscFunctionBegin; 313 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 314 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 315 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 316 317 if (reuse == MAT_REUSE_MATRIX) { 318 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 319 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 320 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 321 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 322 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 323 if (container) { 324 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 325 } else { 326 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 327 } 328 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 329 330 nsends = redund->nsends; 331 nrecvs = redund->nrecvs; 332 send_rank = redund->send_rank; recv_rank = send_rank + size; 333 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 334 sbuf_j = redund->sbuf_j; 335 sbuf_a = redund->sbuf_a; 336 rbuf_j = redund->rbuf_j; 337 rbuf_a = redund->rbuf_a; 338 } 339 340 if (reuse == MAT_INITIAL_MATRIX){ 341 PetscMPIInt subrank,subsize; 342 PetscInt nleftover,np_subcomm; 343 /* get the destination processors' id send_rank, nsends and nrecvs */ 344 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 345 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 346 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 347 recv_rank = send_rank + size; 348 np_subcomm = size/nsubcomm; 349 nleftover = size - nsubcomm*np_subcomm; 350 nsends = 0; nrecvs = 0; 351 for (i=0; i<size; i++){ /* i=rank*/ 352 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 353 send_rank[nsends] = i; nsends++; 354 recv_rank[nrecvs++] = i; 355 } 356 } 357 if (rank >= size - nleftover){/* this proc is a leftover processor */ 358 i = size-nleftover-1; 359 j = 0; 360 while (j < nsubcomm - nleftover){ 361 send_rank[nsends++] = i; 362 i--; j++; 363 } 364 } 365 366 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 367 for (i=0; i<nleftover; i++){ 368 recv_rank[nrecvs++] = size-nleftover+i; 369 } 370 } 371 372 /* allocate sbuf_j, sbuf_a */ 373 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 374 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 375 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 376 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 377 378 /* copy mat's local entries into the buffers */ 379 if (reuse == MAT_INITIAL_MATRIX){ 380 rownz_max = 0; 381 rptr = sbuf_j; 382 cols = sbuf_j + rend-rstart + 1; 383 vals = sbuf_a; 384 rptr[0] = 0; 385 for (i=0; i<rend-rstart; i++){ 386 row = i + rstart; 387 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 388 ncols = nzA + nzB; 389 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 390 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 391 /* load the column indices for this row into cols */ 392 lwrite = 0; 393 for (l=0; l<nzB; l++) { 394 if ((ctmp = bmap[cworkB[l]]) < cstart){ 395 vals[lwrite] = aworkB[l]; 396 cols[lwrite++] = ctmp; 397 } 398 } 399 for (l=0; l<nzA; l++){ 400 vals[lwrite] = aworkA[l]; 401 cols[lwrite++] = cstart + cworkA[l]; 402 } 403 for (l=0; l<nzB; l++) { 404 if ((ctmp = bmap[cworkB[l]]) >= cend){ 405 vals[lwrite] = aworkB[l]; 406 cols[lwrite++] = ctmp; 407 } 408 } 409 vals += ncols; 410 cols += ncols; 411 rptr[i+1] = rptr[i] + ncols; 412 if (rownz_max < ncols) rownz_max = ncols; 413 } 414 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 415 } else { /* only copy matrix values into sbuf_a */ 416 rptr = sbuf_j; 417 vals = sbuf_a; 418 rptr[0] = 0; 419 for (i=0; i<rend-rstart; i++){ 420 row = i + rstart; 421 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 422 ncols = nzA + nzB; 423 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 424 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 425 lwrite = 0; 426 for (l=0; l<nzB; l++) { 427 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 428 } 429 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 430 for (l=0; l<nzB; l++) { 431 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 432 } 433 vals += ncols; 434 rptr[i+1] = rptr[i] + ncols; 435 } 436 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 437 438 /* send nzlocal to others, and recv other's nzlocal */ 439 /*--------------------------------------------------*/ 440 if (reuse == MAT_INITIAL_MATRIX){ 441 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 442 s_waits2 = s_waits3 + nsends; 443 s_waits1 = s_waits2 + nsends; 444 r_waits1 = s_waits1 + nsends; 445 r_waits2 = r_waits1 + nrecvs; 446 r_waits3 = r_waits2 + nrecvs; 447 } else { 448 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 449 r_waits3 = s_waits3 + nsends; 450 } 451 452 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 453 if (reuse == MAT_INITIAL_MATRIX){ 454 /* get new tags to keep the communication clean */ 455 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 456 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 457 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 458 rbuf_nz = sbuf_nz + nsends; 459 460 /* post receives of other's nzlocal */ 461 for (i=0; i<nrecvs; i++){ 462 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 463 } 464 /* send nzlocal to others */ 465 for (i=0; i<nsends; i++){ 466 sbuf_nz[i] = nzlocal; 467 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 468 } 469 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 470 count = nrecvs; 471 while (count) { 472 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 473 recv_rank[imdex] = recv_status.MPI_SOURCE; 474 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 475 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 476 477 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 478 rbuf_nz[imdex] += i + 2; 479 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 480 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 481 count--; 482 } 483 /* wait on sends of nzlocal */ 484 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 485 /* send mat->i,j to others, and recv from other's */ 486 /*------------------------------------------------*/ 487 for (i=0; i<nsends; i++){ 488 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 489 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 490 } 491 /* wait on receives of mat->i,j */ 492 /*------------------------------*/ 493 count = nrecvs; 494 while (count) { 495 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 496 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 497 count--; 498 } 499 /* wait on sends of mat->i,j */ 500 /*---------------------------*/ 501 if (nsends) { 502 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 503 } 504 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 505 506 /* post receives, send and receive mat->a */ 507 /*----------------------------------------*/ 508 for (imdex=0; imdex<nrecvs; imdex++) { 509 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 510 } 511 for (i=0; i<nsends; i++){ 512 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 513 } 514 count = nrecvs; 515 while (count) { 516 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 517 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 518 count--; 519 } 520 if (nsends) { 521 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 522 } 523 524 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 525 526 /* create redundant matrix */ 527 /*-------------------------*/ 528 if (reuse == MAT_INITIAL_MATRIX){ 529 /* compute rownz_max for preallocation */ 530 for (imdex=0; imdex<nrecvs; imdex++){ 531 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 532 rptr = rbuf_j[imdex]; 533 for (i=0; i<j; i++){ 534 ncols = rptr[i+1] - rptr[i]; 535 if (rownz_max < ncols) rownz_max = ncols; 536 } 537 } 538 539 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 540 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 541 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 542 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 543 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 544 } else { 545 C = *matredundant; 546 } 547 548 /* insert local matrix entries */ 549 rptr = sbuf_j; 550 cols = sbuf_j + rend-rstart + 1; 551 vals = sbuf_a; 552 for (i=0; i<rend-rstart; i++){ 553 row = i + rstart; 554 ncols = rptr[i+1] - rptr[i]; 555 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 556 vals += ncols; 557 cols += ncols; 558 } 559 /* insert received matrix entries */ 560 for (imdex=0; imdex<nrecvs; imdex++){ 561 rstart = rowrange[recv_rank[imdex]]; 562 rend = rowrange[recv_rank[imdex]+1]; 563 rptr = rbuf_j[imdex]; 564 cols = rbuf_j[imdex] + rend-rstart + 1; 565 vals = rbuf_a[imdex]; 566 for (i=0; i<rend-rstart; i++){ 567 row = i + rstart; 568 ncols = rptr[i+1] - rptr[i]; 569 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 570 vals += ncols; 571 cols += ncols; 572 } 573 } 574 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 575 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 577 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 578 if (reuse == MAT_INITIAL_MATRIX){ 579 PetscObjectContainer container; 580 *matredundant = C; 581 /* create a supporting struct and attach it to C for reuse */ 582 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 583 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 584 ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 586 ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr); 587 588 redund->nzlocal = nzlocal; 589 redund->nsends = nsends; 590 redund->nrecvs = nrecvs; 591 redund->send_rank = send_rank; 592 redund->sbuf_nz = sbuf_nz; 593 redund->sbuf_j = sbuf_j; 594 redund->sbuf_a = sbuf_a; 595 redund->rbuf_j = rbuf_j; 596 redund->rbuf_a = rbuf_a; 597 598 redund->MatDestroy = C->ops->destroy; 599 C->ops->destroy = MatDestroy_MatRedundant; 600 } 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "PCSetUp_Redundant" 606 static PetscErrorCode PCSetUp_Redundant(PC pc) 607 { 608 PC_Redundant *red = (PC_Redundant*)pc->data; 609 PetscErrorCode ierr; 610 PetscInt mstart,mend,mlocal,m; 611 PetscMPIInt size; 612 MatReuse reuse = MAT_INITIAL_MATRIX; 613 MatStructure str = DIFFERENT_NONZERO_PATTERN; 614 MPI_Comm comm; 615 Vec vec; 616 617 PetscInt mlocal_sub; 618 PetscMPIInt subsize,subrank; 619 PetscInt rstart_sub,rend_sub,mloc_sub; 620 621 PetscFunctionBegin; 622 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 623 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 624 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 625 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 626 if (!pc->setupcalled) { 627 /* create working vectors xsub/ysub and xdup/ydup */ 628 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 629 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 630 631 /* get local size of xsub/ysub */ 632 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 633 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 634 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 635 if (subrank+1 < subsize){ 636 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 637 } else { 638 rend_sub = m; 639 } 640 mloc_sub = rend_sub - rstart_sub; 641 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 642 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 643 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 644 645 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 646 Note: we use communicator dupcomm, not pc->comm! */ 647 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 648 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 649 650 /* create vec scatters */ 651 if (!red->scatterin){ 652 IS is1,is2; 653 PetscInt *idx1,*idx2,i,j,k; 654 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 655 idx2 = idx1 + red->nsubcomm*mlocal; 656 j = 0; 657 for (k=0; k<red->nsubcomm; k++){ 658 for (i=mstart; i<mend; i++){ 659 idx1[j] = i; 660 idx2[j++] = i + m*k; 661 } 662 } 663 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 664 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 665 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 666 ierr = ISDestroy(is1);CHKERRQ(ierr); 667 ierr = ISDestroy(is2);CHKERRQ(ierr); 668 669 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 670 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 671 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 672 ierr = ISDestroy(is1);CHKERRQ(ierr); 673 ierr = ISDestroy(is2);CHKERRQ(ierr); 674 ierr = PetscFree(idx1);CHKERRQ(ierr); 675 } 676 } 677 ierr = VecDestroy(vec);CHKERRQ(ierr); 678 679 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 681 if (size == 1) { 682 red->useparallelmat = PETSC_FALSE; 683 } 684 685 if (red->useparallelmat) { 686 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 687 /* destroy old matrices */ 688 if (red->pmats) { 689 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 690 } 691 } else if (pc->setupcalled == 1) { 692 reuse = MAT_REUSE_MATRIX; 693 str = SAME_NONZERO_PATTERN; 694 } 695 696 /* grab the parallel matrix and put it into processors of a subcomminicator */ 697 /*--------------------------------------------------------------------------*/ 698 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 699 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr); 700 /* tell PC of the subcommunicator its operators */ 701 ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr); 702 } else { 703 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 704 } 705 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 706 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "PCApply_Redundant" 712 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 713 { 714 PC_Redundant *red = (PC_Redundant*)pc->data; 715 PetscErrorCode ierr; 716 PetscScalar *array; 717 718 PetscFunctionBegin; 719 /* scatter x to xdup */ 720 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 721 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 722 723 /* place xdup's local array into xsub */ 724 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 725 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 726 727 /* apply preconditioner on each processor */ 728 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 729 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 730 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 731 732 /* place ysub's local array into ydup */ 733 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 734 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 735 736 /* scatter ydup to y */ 737 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 738 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 739 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 740 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "PCDestroy_Redundant" 746 static PetscErrorCode PCDestroy_Redundant(PC pc) 747 { 748 PC_Redundant *red = (PC_Redundant*)pc->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 753 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 754 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 755 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 756 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 757 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 758 if (red->pmats) { 759 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 760 } 761 762 763 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 764 ierr = PetscFree(red);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "PCSetFromOptions_Redundant" 770 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 771 { 772 PetscFunctionBegin; 773 PetscFunctionReturn(0); 774 } 775 776 EXTERN_C_BEGIN 777 #undef __FUNCT__ 778 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 779 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 780 { 781 PC_Redundant *red = (PC_Redundant*)pc->data; 782 PetscErrorCode ierr; 783 784 PetscFunctionBegin; 785 red->scatterin = in; 786 red->scatterout = out; 787 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 788 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 EXTERN_C_END 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "PCRedundantSetScatter" 795 /*@ 796 PCRedundantSetScatter - Sets the scatter used to copy values into the 797 redundant local solve and the scatter to move them back into the global 798 vector. 799 800 Collective on PC 801 802 Input Parameters: 803 + pc - the preconditioner context 804 . in - the scatter to move the values in 805 - out - the scatter to move them out 806 807 Level: advanced 808 809 .keywords: PC, redundant solve 810 @*/ 811 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 812 { 813 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 814 815 PetscFunctionBegin; 816 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 817 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 818 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 819 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 820 if (f) { 821 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 EXTERN_C_BEGIN 827 #undef __FUNCT__ 828 #define __FUNCT__ "PCRedundantGetPC_Redundant" 829 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 830 { 831 PC_Redundant *red = (PC_Redundant*)pc->data; 832 833 PetscFunctionBegin; 834 *innerpc = red->pc; 835 PetscFunctionReturn(0); 836 } 837 EXTERN_C_END 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "PCRedundantGetPC" 841 /*@ 842 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 843 844 Not Collective 845 846 Input Parameter: 847 . pc - the preconditioner context 848 849 Output Parameter: 850 . innerpc - the sequential PC 851 852 Level: advanced 853 854 .keywords: PC, redundant solve 855 @*/ 856 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 857 { 858 PetscErrorCode ierr,(*f)(PC,PC*); 859 860 PetscFunctionBegin; 861 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 862 PetscValidPointer(innerpc,2); 863 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 864 if (f) { 865 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 EXTERN_C_BEGIN 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 873 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 874 { 875 PC_Redundant *red = (PC_Redundant*)pc->data; 876 877 PetscFunctionBegin; 878 if (mat) *mat = red->pmats; 879 if (pmat) *pmat = red->pmats; 880 PetscFunctionReturn(0); 881 } 882 EXTERN_C_END 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "PCRedundantGetOperators" 886 /*@ 887 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 888 889 Not Collective 890 891 Input Parameter: 892 . pc - the preconditioner context 893 894 Output Parameters: 895 + mat - the matrix 896 - pmat - the (possibly different) preconditioner matrix 897 898 Level: advanced 899 900 .keywords: PC, redundant solve 901 @*/ 902 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 903 { 904 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 905 906 PetscFunctionBegin; 907 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 908 if (mat) PetscValidPointer(mat,2); 909 if (pmat) PetscValidPointer(pmat,3); 910 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 911 if (f) { 912 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 /* -------------------------------------------------------------------------------------*/ 918 /*MC 919 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 920 921 922 Options for the redundant preconditioners can be set with -redundant_pc_xxx 923 924 Level: intermediate 925 926 927 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 928 PCRedundantGetPC(), PCRedundantGetOperators() 929 930 M*/ 931 932 EXTERN_C_BEGIN 933 #undef __FUNCT__ 934 #define __FUNCT__ "PCCreate_Redundant" 935 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 936 { 937 PetscErrorCode ierr; 938 PC_Redundant *red; 939 const char *prefix; 940 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 941 PetscMPIInt rank,size,subrank,*subsize,duprank; 942 MPI_Comm subcomm=0,dupcomm=0; 943 944 PetscFunctionBegin; 945 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 946 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 947 red->useparallelmat = PETSC_TRUE; 948 949 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 950 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 951 nsubcomm = size; 952 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 953 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 954 955 /*-------------------------------------------------------------------------------------------------- 956 To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a 957 processe into a subcomm. 958 An example: size=4, nsubcomm=3 959 pc->comm: 960 rank: [0] [1] [2] [3] 961 color: 0 1 2 0 962 963 subcomm: 964 subrank: [0] [0] [0] [1] 965 966 dupcomm: 967 duprank: [0] [2] [3] [1] 968 969 Here, subcomm[color = 0] has subsize=2, owns process [0] and [3] 970 subcomm[color = 1] has subsize=1, owns process [1] 971 subcomm[color = 2] has subsize=1, owns process [2] 972 dupcomm has same number of processes as pc->comm, and its duprank maps 973 processes in subcomm contiguously into a 1d array: 974 duprank: [0] [1] [2] [3] 975 rank: [0] [3] [1] [2] 976 subcomm[0] subcomm[1] subcomm[2] 977 ----------------------------------------------------------------------------------------*/ 978 979 /* get size of each subcommunicators */ 980 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 981 np_subcomm = size/nsubcomm; 982 nleftover = size - nsubcomm*np_subcomm; 983 for (i=0; i<nsubcomm; i++){ 984 subsize[i] = np_subcomm; 985 if (i<nleftover) subsize[i]++; 986 } 987 988 /* find color for this proc */ 989 color = rank%nsubcomm; 990 subrank = rank/nsubcomm; 991 992 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 993 red->subcomm = subcomm; 994 red->color = color; 995 red->nsubcomm = nsubcomm; 996 997 j = 0; duprank = 0; 998 for (i=0; i<nsubcomm; i++){ 999 if (j == color){ 1000 duprank += subrank; 1001 break; 1002 } 1003 duprank += subsize[i]; j++; 1004 } 1005 1006 /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */ 1007 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 1008 red->dupcomm = dupcomm; 1009 ierr = PetscFree(subsize);CHKERRQ(ierr); 1010 /* if (rank == 0) printf("[%d] subrank %d, duprank: %d\n",rank,subrank,duprank); */ 1011 1012 /* create the sequential PC that each processor has copy of */ 1013 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 1014 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 1015 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 1016 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 1017 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 1018 1019 pc->ops->apply = PCApply_Redundant; 1020 pc->ops->applytranspose = 0; 1021 pc->ops->setup = PCSetUp_Redundant; 1022 pc->ops->destroy = PCDestroy_Redundant; 1023 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 1024 pc->ops->setuponblocks = 0; 1025 pc->ops->view = PCView_Redundant; 1026 pc->ops->applyrichardson = 0; 1027 1028 pc->data = (void*)red; 1029 1030 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 1031 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 1032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 1033 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 1034 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 1035 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 1036 1037 PetscFunctionReturn(0); 1038 } 1039 EXTERN_C_END 1040