1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 14 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 15 PetscTruth useparallelmat; 16 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 17 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 18 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 19 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 20 PetscInt color; /* color of processors in a subcommunicator */ 21 } PC_Redundant; 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "PCView_Redundant" 25 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 26 { 27 PC_Redundant *red = (PC_Redundant*)pc->data; 28 PetscErrorCode ierr; 29 PetscMPIInt rank; 30 PetscTruth iascii,isstring; 31 PetscViewer sviewer; 32 33 PetscFunctionBegin; 34 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 35 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 37 if (iascii) { 38 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 39 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 40 if (!rank) { 41 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 42 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 43 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 44 } 45 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 46 } else if (isstring) { 47 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 48 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 49 if (!rank) { 50 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 51 } 52 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 53 } else { 54 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 60 #include "private/vecimpl.h" 61 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 62 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 63 64 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 65 PetscInt nzlocal,nsends,nrecvs; 66 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 67 PetscScalar *sbuf_a,**rbuf_a; 68 PetscErrorCode (*MatDestroy)(Mat); 69 } Mat_Redundant; 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant" 73 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr) 74 { 75 PetscErrorCode ierr; 76 Mat_Redundant *redund=(Mat_Redundant*)ptr; 77 PetscInt i; 78 79 PetscFunctionBegin; 80 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 81 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 82 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 83 for (i=0; i<redund->nrecvs; i++){ 84 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 85 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 86 } 87 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 88 ierr = PetscFree(redund);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 #undef __FUNCT__ 93 #define __FUNCT__ "MatDestroy_MatRedundant" 94 PetscErrorCode MatDestroy_MatRedundant(Mat A) 95 { 96 PetscErrorCode ierr; 97 PetscObjectContainer container; 98 Mat_Redundant *redund=PETSC_NULL; 99 100 PetscFunctionBegin; 101 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 102 if (container) { 103 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 104 } else { 105 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 106 } 107 A->ops->destroy = redund->MatDestroy; 108 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 109 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 110 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatGetRedundantMatrix" 116 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 117 { 118 PetscMPIInt rank,size; 119 MPI_Comm comm=mat->comm; 120 PetscErrorCode ierr; 121 PetscInt nsends,nrecvs,i,prid=100,rownz_max; 122 PetscMPIInt *send_rank,*recv_rank; 123 PetscInt *rowrange=mat->rmap.range; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 125 Mat A=aij->A,B=aij->B,C=*matredundant; 126 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 127 PetscScalar *sbuf_a; 128 PetscInt nzlocal=a->nz+b->nz; 129 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 130 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 131 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 132 PetscScalar *vals,*aworkA,*aworkB; 133 PetscMPIInt tag1,tag2,tag3,imdex; 134 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 135 MPI_Status recv_status,*send_status; 136 PetscInt *sbuf_nz,*rbuf_nz,count; 137 PetscInt **rbuf_j; 138 PetscScalar **rbuf_a; 139 Mat_Redundant *redund=PETSC_NULL; 140 PetscObjectContainer container; 141 142 PetscFunctionBegin; 143 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 144 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 146 147 if (reuse == MAT_REUSE_MATRIX) { 148 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 149 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 150 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 151 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 152 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 153 if (container) { 154 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 155 } else { 156 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 157 } 158 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 159 160 nsends = redund->nsends; 161 nrecvs = redund->nrecvs; 162 send_rank = redund->send_rank; recv_rank = send_rank + size; 163 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 164 sbuf_j = redund->sbuf_j; 165 sbuf_a = redund->sbuf_a; 166 rbuf_j = redund->rbuf_j; 167 rbuf_a = redund->rbuf_a; 168 } 169 170 if (reuse == MAT_INITIAL_MATRIX){ 171 PetscMPIInt subrank,subsize; 172 PetscInt nleftover,np_subcomm; 173 /* get the destination processors' id send_rank, nsends and nrecvs */ 174 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 175 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 176 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 177 recv_rank = send_rank + size; 178 np_subcomm = size/nsubcomm; 179 nleftover = size - nsubcomm*np_subcomm; 180 nsends = 0; nrecvs = 0; 181 for (i=0; i<size; i++){ /* i=rank*/ 182 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 183 send_rank[nsends] = i; nsends++; 184 recv_rank[nrecvs++] = i; 185 } 186 } 187 if (rank >= size - nleftover){/* this proc is a leftover processor */ 188 i = size-nleftover-1; 189 j = 0; 190 while (j < nsubcomm - nleftover){ 191 send_rank[nsends++] = i; 192 i--; j++; 193 } 194 } 195 196 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 197 for (i=0; i<nleftover; i++){ 198 recv_rank[nrecvs++] = size-nleftover+i; 199 } 200 } 201 202 /* allocate sbuf_j, sbuf_a */ 203 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 204 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 205 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 206 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 207 208 /* copy mat's local entries into the buffers */ 209 if (reuse == MAT_INITIAL_MATRIX){ 210 rownz_max = 0; 211 rptr = sbuf_j; 212 cols = sbuf_j + rend-rstart + 1; 213 vals = sbuf_a; 214 rptr[0] = 0; 215 for (i=0; i<rend-rstart; i++){ 216 row = i + rstart; 217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 218 ncols = nzA + nzB; 219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 221 /* load the column indices for this row into cols */ 222 lwrite = 0; 223 for (l=0; l<nzB; l++) { 224 if ((ctmp = bmap[cworkB[l]]) < cstart){ 225 vals[lwrite] = aworkB[l]; 226 cols[lwrite++] = ctmp; 227 } 228 } 229 for (l=0; l<nzA; l++){ 230 vals[lwrite] = aworkA[l]; 231 cols[lwrite++] = cstart + cworkA[l]; 232 } 233 for (l=0; l<nzB; l++) { 234 if ((ctmp = bmap[cworkB[l]]) >= cend){ 235 vals[lwrite] = aworkB[l]; 236 cols[lwrite++] = ctmp; 237 } 238 } 239 vals += ncols; 240 cols += ncols; 241 rptr[i+1] = rptr[i] + ncols; 242 if (rownz_max < ncols) rownz_max = ncols; 243 } 244 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 245 } else { /* only copy matrix values into sbuf_a */ 246 rptr = sbuf_j; 247 vals = sbuf_a; 248 rptr[0] = 0; 249 for (i=0; i<rend-rstart; i++){ 250 row = i + rstart; 251 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 252 ncols = nzA + nzB; 253 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 254 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 255 lwrite = 0; 256 for (l=0; l<nzB; l++) { 257 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 258 } 259 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 260 for (l=0; l<nzB; l++) { 261 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 262 } 263 vals += ncols; 264 rptr[i+1] = rptr[i] + ncols; 265 } 266 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 267 268 /* send nzlocal to others, and recv other's nzlocal */ 269 /*--------------------------------------------------*/ 270 if (reuse == MAT_INITIAL_MATRIX){ 271 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 272 s_waits2 = s_waits3 + nsends; 273 s_waits1 = s_waits2 + nsends; 274 r_waits1 = s_waits1 + nsends; 275 r_waits2 = r_waits1 + nrecvs; 276 r_waits3 = r_waits2 + nrecvs; 277 } else { 278 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 279 r_waits3 = s_waits3 + nsends; 280 } 281 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 283 if (reuse == MAT_INITIAL_MATRIX){ 284 /* get new tags to keep the communication clean */ 285 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 286 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 287 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 288 rbuf_nz = sbuf_nz + nsends; 289 290 /* post receives of other's nzlocal */ 291 for (i=0; i<nrecvs; i++){ 292 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 293 } 294 /* send nzlocal to others */ 295 for (i=0; i<nsends; i++){ 296 sbuf_nz[i] = nzlocal; 297 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 298 } 299 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 300 count = nrecvs; 301 while (count) { 302 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 303 recv_rank[imdex] = recv_status.MPI_SOURCE; 304 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 305 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 306 307 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 308 rbuf_nz[imdex] += i + 2; 309 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 310 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 311 count--; 312 } 313 /* wait on sends of nzlocal */ 314 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 315 /* send mat->i,j to others, and recv from other's */ 316 /*------------------------------------------------*/ 317 for (i=0; i<nsends; i++){ 318 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 319 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 320 } 321 /* wait on receives of mat->i,j */ 322 /*------------------------------*/ 323 count = nrecvs; 324 while (count) { 325 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 326 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 327 count--; 328 } 329 /* wait on sends of mat->i,j */ 330 /*---------------------------*/ 331 if (nsends) { 332 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 333 } 334 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 335 336 /* post receives, send and receive mat->a */ 337 /*----------------------------------------*/ 338 for (imdex=0; imdex<nrecvs; imdex++) { 339 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 340 } 341 for (i=0; i<nsends; i++){ 342 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 343 } 344 count = nrecvs; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 347 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 348 count--; 349 } 350 if (nsends) { 351 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 352 } 353 354 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 355 356 /* create redundant matrix */ 357 /*-------------------------*/ 358 if (reuse == MAT_INITIAL_MATRIX){ 359 /* compute rownz_max for preallocation */ 360 for (imdex=0; imdex<nrecvs; imdex++){ 361 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 362 rptr = rbuf_j[imdex]; 363 for (i=0; i<j; i++){ 364 ncols = rptr[i+1] - rptr[i]; 365 if (rownz_max < ncols) rownz_max = ncols; 366 } 367 } 368 369 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 370 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 371 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 372 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 373 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 374 } else { 375 C = *matredundant; 376 } 377 378 /* insert local matrix entries */ 379 rptr = sbuf_j; 380 cols = sbuf_j + rend-rstart + 1; 381 vals = sbuf_a; 382 for (i=0; i<rend-rstart; i++){ 383 row = i + rstart; 384 ncols = rptr[i+1] - rptr[i]; 385 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 386 vals += ncols; 387 cols += ncols; 388 } 389 /* insert received matrix entries */ 390 for (imdex=0; imdex<nrecvs; imdex++){ 391 rstart = rowrange[recv_rank[imdex]]; 392 rend = rowrange[recv_rank[imdex]+1]; 393 rptr = rbuf_j[imdex]; 394 cols = rbuf_j[imdex] + rend-rstart + 1; 395 vals = rbuf_a[imdex]; 396 for (i=0; i<rend-rstart; i++){ 397 row = i + rstart; 398 ncols = rptr[i+1] - rptr[i]; 399 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 400 vals += ncols; 401 cols += ncols; 402 } 403 } 404 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 407 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 408 if (reuse == MAT_INITIAL_MATRIX){ 409 PetscObjectContainer container; 410 *matredundant = C; 411 /* create a supporting struct and attach it to C for reuse */ 412 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 413 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 414 ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr); 415 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 416 ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr); 417 418 redund->nzlocal = nzlocal; 419 redund->nsends = nsends; 420 redund->nrecvs = nrecvs; 421 redund->send_rank = send_rank; 422 redund->sbuf_nz = sbuf_nz; 423 redund->sbuf_j = sbuf_j; 424 redund->sbuf_a = sbuf_a; 425 redund->rbuf_j = rbuf_j; 426 redund->rbuf_a = rbuf_a; 427 428 redund->MatDestroy = C->ops->destroy; 429 C->ops->destroy = MatDestroy_MatRedundant; 430 } 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "PCSetUp_Redundant" 436 static PetscErrorCode PCSetUp_Redundant(PC pc) 437 { 438 PC_Redundant *red = (PC_Redundant*)pc->data; 439 PetscErrorCode ierr; 440 PetscInt mstart,mend,mlocal,m; 441 PetscMPIInt size; 442 MatReuse reuse = MAT_INITIAL_MATRIX; 443 MatStructure str = DIFFERENT_NONZERO_PATTERN; 444 MPI_Comm comm; 445 Vec vec; 446 447 PetscInt mlocal_sub; 448 PetscMPIInt subsize,subrank; 449 PetscInt rstart_sub,rend_sub,mloc_sub; 450 451 PetscFunctionBegin; 452 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 453 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 454 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 455 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 456 if (!pc->setupcalled) { 457 /* create working vectors xsub/ysub and xdup/ydup */ 458 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 459 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 460 461 /* get local size of xsub/ysub */ 462 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 463 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 464 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 465 if (subrank+1 < subsize){ 466 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 467 } else { 468 rend_sub = m; 469 } 470 mloc_sub = rend_sub - rstart_sub; 471 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 472 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 473 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 474 475 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 476 Note: we use communicator dupcomm, not pc->comm! */ 477 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 478 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 479 480 /* create vec scatters */ 481 if (!red->scatterin){ 482 IS is1,is2; 483 PetscInt *idx1,*idx2,i,j,k; 484 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 485 idx2 = idx1 + red->nsubcomm*mlocal; 486 j = 0; 487 for (k=0; k<red->nsubcomm; k++){ 488 for (i=mstart; i<mend; i++){ 489 idx1[j] = i; 490 idx2[j++] = i + m*k; 491 } 492 } 493 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 494 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 495 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 496 ierr = ISDestroy(is1);CHKERRQ(ierr); 497 ierr = ISDestroy(is2);CHKERRQ(ierr); 498 499 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 500 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 501 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 502 ierr = ISDestroy(is1);CHKERRQ(ierr); 503 ierr = ISDestroy(is2);CHKERRQ(ierr); 504 ierr = PetscFree(idx1);CHKERRQ(ierr); 505 } 506 } 507 ierr = VecDestroy(vec);CHKERRQ(ierr); 508 509 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 511 if (size == 1) { 512 red->useparallelmat = PETSC_FALSE; 513 } 514 515 if (red->useparallelmat) { 516 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 517 /* destroy old matrices */ 518 if (red->pmats) { 519 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 520 } 521 } else if (pc->setupcalled == 1) { 522 reuse = MAT_REUSE_MATRIX; 523 str = SAME_NONZERO_PATTERN; 524 } 525 526 /* grab the parallel matrix and put it into processors of a subcomminicator */ 527 /*--------------------------------------------------------------------------*/ 528 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 529 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr); 530 /* tell PC of the subcommunicator its operators */ 531 ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr); 532 } else { 533 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 534 } 535 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 536 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "PCApply_Redundant" 542 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 543 { 544 PC_Redundant *red = (PC_Redundant*)pc->data; 545 PetscErrorCode ierr; 546 PetscScalar *array; 547 548 PetscFunctionBegin; 549 /* scatter x to xdup */ 550 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 551 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 552 553 /* place xdup's local array into xsub */ 554 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 555 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 556 557 /* apply preconditioner on each processor */ 558 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 559 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 560 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 561 562 /* place ysub's local array into ydup */ 563 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 564 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 565 566 /* scatter ydup to y */ 567 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 568 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 569 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 570 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "PCDestroy_Redundant" 576 static PetscErrorCode PCDestroy_Redundant(PC pc) 577 { 578 PC_Redundant *red = (PC_Redundant*)pc->data; 579 PetscErrorCode ierr; 580 581 PetscFunctionBegin; 582 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 583 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 584 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 585 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 586 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 587 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 588 if (red->pmats) { 589 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 590 } 591 592 593 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 594 ierr = PetscFree(red);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "PCSetFromOptions_Redundant" 600 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 601 { 602 PetscFunctionBegin; 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 609 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 610 { 611 PC_Redundant *red = (PC_Redundant*)pc->data; 612 PetscErrorCode ierr; 613 614 PetscFunctionBegin; 615 red->scatterin = in; 616 red->scatterout = out; 617 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 618 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 EXTERN_C_END 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "PCRedundantSetScatter" 625 /*@ 626 PCRedundantSetScatter - Sets the scatter used to copy values into the 627 redundant local solve and the scatter to move them back into the global 628 vector. 629 630 Collective on PC 631 632 Input Parameters: 633 + pc - the preconditioner context 634 . in - the scatter to move the values in 635 - out - the scatter to move them out 636 637 Level: advanced 638 639 .keywords: PC, redundant solve 640 @*/ 641 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 642 { 643 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 644 645 PetscFunctionBegin; 646 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 647 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 648 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 649 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 650 if (f) { 651 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 652 } 653 PetscFunctionReturn(0); 654 } 655 656 EXTERN_C_BEGIN 657 #undef __FUNCT__ 658 #define __FUNCT__ "PCRedundantGetPC_Redundant" 659 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 660 { 661 PC_Redundant *red = (PC_Redundant*)pc->data; 662 663 PetscFunctionBegin; 664 *innerpc = red->pc; 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "PCRedundantGetPC" 671 /*@ 672 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 673 674 Not Collective 675 676 Input Parameter: 677 . pc - the preconditioner context 678 679 Output Parameter: 680 . innerpc - the sequential PC 681 682 Level: advanced 683 684 .keywords: PC, redundant solve 685 @*/ 686 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 687 { 688 PetscErrorCode ierr,(*f)(PC,PC*); 689 690 PetscFunctionBegin; 691 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 692 PetscValidPointer(innerpc,2); 693 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 694 if (f) { 695 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 EXTERN_C_BEGIN 701 #undef __FUNCT__ 702 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 703 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 704 { 705 PC_Redundant *red = (PC_Redundant*)pc->data; 706 707 PetscFunctionBegin; 708 if (mat) *mat = red->pmats; 709 if (pmat) *pmat = red->pmats; 710 PetscFunctionReturn(0); 711 } 712 EXTERN_C_END 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "PCRedundantGetOperators" 716 /*@ 717 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 718 719 Not Collective 720 721 Input Parameter: 722 . pc - the preconditioner context 723 724 Output Parameters: 725 + mat - the matrix 726 - pmat - the (possibly different) preconditioner matrix 727 728 Level: advanced 729 730 .keywords: PC, redundant solve 731 @*/ 732 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 733 { 734 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 735 736 PetscFunctionBegin; 737 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 738 if (mat) PetscValidPointer(mat,2); 739 if (pmat) PetscValidPointer(pmat,3); 740 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 741 if (f) { 742 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 /* -------------------------------------------------------------------------------------*/ 748 /*MC 749 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 750 751 752 Options for the redundant preconditioners can be set with -redundant_pc_xxx 753 754 Level: intermediate 755 756 757 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 758 PCRedundantGetPC(), PCRedundantGetOperators() 759 760 M*/ 761 762 EXTERN_C_BEGIN 763 #undef __FUNCT__ 764 #define __FUNCT__ "PCCreate_Redundant" 765 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 766 { 767 PetscErrorCode ierr; 768 PC_Redundant *red; 769 const char *prefix; 770 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 771 PetscMPIInt rank,size,subrank,*subsize,duprank; 772 MPI_Comm subcomm=0,dupcomm=0; 773 774 PetscFunctionBegin; 775 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 776 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 777 red->useparallelmat = PETSC_TRUE; 778 779 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 780 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 781 nsubcomm = size; 782 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 783 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 784 785 /*-------------------------------------------------------------------------------------------------- 786 To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a 787 processe into a subcomm. 788 An example: size=4, nsubcomm=3 789 pc->comm: 790 rank: [0] [1] [2] [3] 791 color: 0 1 2 0 792 793 subcomm: 794 subrank: [0] [0] [0] [1] 795 796 dupcomm: 797 duprank: [0] [2] [3] [1] 798 799 Here, subcomm[color = 0] has subsize=2, owns process [0] and [3] 800 subcomm[color = 1] has subsize=1, owns process [1] 801 subcomm[color = 2] has subsize=1, owns process [2] 802 dupcomm has same number of processes as pc->comm, and its duprank maps 803 processes in subcomm contiguously into a 1d array: 804 duprank: [0] [1] [2] [3] 805 rank: [0] [3] [1] [2] 806 subcomm[0] subcomm[1] subcomm[2] 807 ----------------------------------------------------------------------------------------*/ 808 809 /* get size of each subcommunicators */ 810 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 811 np_subcomm = size/nsubcomm; 812 nleftover = size - nsubcomm*np_subcomm; 813 for (i=0; i<nsubcomm; i++){ 814 subsize[i] = np_subcomm; 815 if (i<nleftover) subsize[i]++; 816 } 817 818 /* find color for this proc */ 819 color = rank%nsubcomm; 820 subrank = rank/nsubcomm; 821 822 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 823 red->subcomm = subcomm; 824 red->color = color; 825 red->nsubcomm = nsubcomm; 826 827 j = 0; duprank = 0; 828 for (i=0; i<nsubcomm; i++){ 829 if (j == color){ 830 duprank += subrank; 831 break; 832 } 833 duprank += subsize[i]; j++; 834 } 835 836 /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */ 837 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 838 red->dupcomm = dupcomm; 839 ierr = PetscFree(subsize);CHKERRQ(ierr); 840 /* if (rank == 0) printf("[%d] subrank %d, duprank: %d\n",rank,subrank,duprank); */ 841 842 /* create the sequential PC that each processor has copy of */ 843 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 844 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 845 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 846 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 847 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 848 849 pc->ops->apply = PCApply_Redundant; 850 pc->ops->applytranspose = 0; 851 pc->ops->setup = PCSetUp_Redundant; 852 pc->ops->destroy = PCDestroy_Redundant; 853 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 854 pc->ops->setuponblocks = 0; 855 pc->ops->view = PCView_Redundant; 856 pc->ops->applyrichardson = 0; 857 858 pc->data = (void*)red; 859 860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 861 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 863 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 865 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 866 867 PetscFunctionReturn(0); 868 } 869 EXTERN_C_END 870