1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 14 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 15 PetscTruth useparallelmat; 16 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 17 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 18 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 19 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 20 PetscInt color; /* color of processors in a subcommunicator */ 21 } PC_Redundant; 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "PCView_Redundant" 25 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 26 { 27 PC_Redundant *red = (PC_Redundant*)pc->data; 28 PetscErrorCode ierr; 29 PetscMPIInt rank; 30 PetscTruth iascii,isstring; 31 PetscViewer sviewer; 32 33 PetscFunctionBegin; 34 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 35 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 37 if (iascii) { 38 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 39 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 40 if (!rank) { 41 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 42 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 43 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 44 } 45 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 46 } else if (isstring) { 47 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 48 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 49 if (!rank) { 50 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 51 } 52 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 53 } else { 54 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 60 #include "private/vecimpl.h" 61 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 62 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 63 64 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 65 PetscInt nzlocal,nsends,nrecvs; 66 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 67 PetscScalar *sbuf_a,**rbuf_a; 68 PetscErrorCode (*MatDestroy)(Mat); 69 } Mat_Redundant; 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant" 73 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr) 74 { 75 PetscErrorCode ierr; 76 Mat_Redundant *redund=(Mat_Redundant*)ptr; 77 PetscInt i; 78 79 PetscFunctionBegin; 80 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 81 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 82 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 83 for (i=0; i<redund->nrecvs; i++){ 84 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 85 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 86 } 87 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 88 ierr = PetscFree(redund);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 #undef __FUNCT__ 93 #define __FUNCT__ "MatDestroy_MatRedundant" 94 PetscErrorCode MatDestroy_MatRedundant(Mat A) 95 { 96 PetscErrorCode ierr; 97 PetscObjectContainer container; 98 Mat_Redundant *redund=PETSC_NULL; 99 100 PetscFunctionBegin; 101 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 102 if (container) { 103 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 104 } else { 105 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 106 } 107 A->ops->destroy = redund->MatDestroy; 108 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 109 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 110 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatGetRedundantMatrix" 116 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 117 { 118 PetscMPIInt rank,size; 119 MPI_Comm comm=mat->comm; 120 PetscErrorCode ierr; 121 PetscInt nsends,nrecvs,i,prid=100,rownz_max; 122 PetscMPIInt *send_rank,*recv_rank; 123 PetscInt *rowrange=mat->rmap.range; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 125 Mat A=aij->A,B=aij->B,C=*matredundant; 126 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 127 PetscScalar *sbuf_a; 128 PetscInt nzlocal=a->nz+b->nz; 129 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 130 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 131 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 132 PetscScalar *vals,*aworkA,*aworkB; 133 PetscMPIInt tag1,tag2,tag3,imdex; 134 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 135 MPI_Status recv_status,*send_status; 136 PetscInt *sbuf_nz,*rbuf_nz,count; 137 PetscInt **rbuf_j; 138 PetscScalar **rbuf_a; 139 Mat_Redundant *redund=PETSC_NULL; 140 PetscObjectContainer container; 141 142 PetscFunctionBegin; 143 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 144 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 146 147 if (reuse == MAT_REUSE_MATRIX) { 148 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 149 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 150 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 151 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 152 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 153 if (container) { 154 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 155 } else { 156 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 157 } 158 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 159 160 nsends = redund->nsends; 161 nrecvs = redund->nrecvs; 162 send_rank = redund->send_rank; recv_rank = send_rank + size; 163 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 164 sbuf_j = redund->sbuf_j; 165 sbuf_a = redund->sbuf_a; 166 rbuf_j = redund->rbuf_j; 167 rbuf_a = redund->rbuf_a; 168 } 169 170 if (reuse == MAT_INITIAL_MATRIX){ 171 PetscMPIInt subrank,subsize; 172 PetscInt nleftover,np_subcomm; 173 /* get the destination processors' id send_rank, nsends and nrecvs */ 174 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 175 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 176 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 177 recv_rank = send_rank + size; 178 np_subcomm = size/nsubcomm; 179 nleftover = size - nsubcomm*np_subcomm; 180 nsends = 0; nrecvs = 0; 181 for (i=0; i<size; i++){ /* i=rank*/ 182 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 183 send_rank[nsends] = i; nsends++; 184 recv_rank[nrecvs++] = i; 185 } 186 } 187 if (rank >= size - nleftover){/* this proc is a leftover processor */ 188 i = size-nleftover-1; 189 j = 0; 190 while (j < nsubcomm - nleftover){ 191 send_rank[nsends++] = i; 192 i--; j++; 193 } 194 } 195 196 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 197 for (i=0; i<nleftover; i++){ 198 recv_rank[nrecvs++] = size-nleftover+i; 199 } 200 } 201 202 /* allocate sbuf_j, sbuf_a */ 203 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 204 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 205 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 206 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 207 208 /* copy mat's local entries into the buffers */ 209 if (reuse == MAT_INITIAL_MATRIX){ 210 rownz_max = 0; 211 rptr = sbuf_j; 212 cols = sbuf_j + rend-rstart + 1; 213 vals = sbuf_a; 214 rptr[0] = 0; 215 for (i=0; i<rend-rstart; i++){ 216 row = i + rstart; 217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 218 ncols = nzA + nzB; 219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 221 /* load the column indices for this row into cols */ 222 lwrite = 0; 223 for (l=0; l<nzB; l++) { 224 if ((ctmp = bmap[cworkB[l]]) < cstart){ 225 vals[lwrite] = aworkB[l]; 226 cols[lwrite++] = ctmp; 227 } 228 } 229 for (l=0; l<nzA; l++){ 230 vals[lwrite] = aworkA[l]; 231 cols[lwrite++] = cstart + cworkA[l]; 232 } 233 for (l=0; l<nzB; l++) { 234 if ((ctmp = bmap[cworkB[l]]) >= cend){ 235 vals[lwrite] = aworkB[l]; 236 cols[lwrite++] = ctmp; 237 } 238 } 239 vals += ncols; 240 cols += ncols; 241 rptr[i+1] = rptr[i] + ncols; 242 if (rownz_max < ncols) rownz_max = ncols; 243 } 244 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 245 } else { /* only copy matrix values into sbuf_a */ 246 rptr = sbuf_j; 247 vals = sbuf_a; 248 rptr[0] = 0; 249 for (i=0; i<rend-rstart; i++){ 250 row = i + rstart; 251 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 252 ncols = nzA + nzB; 253 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 254 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 255 lwrite = 0; 256 for (l=0; l<nzB; l++) { 257 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 258 } 259 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 260 for (l=0; l<nzB; l++) { 261 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 262 } 263 vals += ncols; 264 rptr[i+1] = rptr[i] + ncols; 265 } 266 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 267 268 /* send nzlocal to others, and recv other's nzlocal */ 269 /*--------------------------------------------------*/ 270 if (reuse == MAT_INITIAL_MATRIX){ 271 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 272 s_waits2 = s_waits3 + nsends; 273 s_waits1 = s_waits2 + nsends; 274 r_waits1 = s_waits1 + nsends; 275 r_waits2 = r_waits1 + nrecvs; 276 r_waits3 = r_waits2 + nrecvs; 277 } else { 278 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 279 r_waits3 = s_waits3 + nsends; 280 } 281 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 283 if (reuse == MAT_INITIAL_MATRIX){ 284 /* get new tags to keep the communication clean */ 285 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 286 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 287 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 288 rbuf_nz = sbuf_nz + nsends; 289 290 /* post receives of other's nzlocal */ 291 for (i=0; i<nrecvs; i++){ 292 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 293 } 294 /* send nzlocal to others */ 295 for (i=0; i<nsends; i++){ 296 sbuf_nz[i] = nzlocal; 297 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 298 } 299 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 300 count = nrecvs; 301 while (count) { 302 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 303 recv_rank[imdex] = recv_status.MPI_SOURCE; 304 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 305 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 306 307 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 308 rbuf_nz[imdex] += i + 2; 309 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 310 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 311 count--; 312 } 313 /* wait on sends of nzlocal */ 314 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 315 /* send mat->i,j to others, and recv from other's */ 316 /*------------------------------------------------*/ 317 for (i=0; i<nsends; i++){ 318 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 319 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 320 } 321 /* wait on receives of mat->i,j */ 322 /*------------------------------*/ 323 count = nrecvs; 324 while (count) { 325 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 326 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 327 count--; 328 } 329 /* wait on sends of mat->i,j */ 330 /*---------------------------*/ 331 if (nsends) { 332 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 333 } 334 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 335 336 /* post receives, send and receive mat->a */ 337 /*----------------------------------------*/ 338 for (imdex=0; imdex<nrecvs; imdex++) { 339 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 340 } 341 for (i=0; i<nsends; i++){ 342 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 343 } 344 count = nrecvs; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 347 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 348 count--; 349 } 350 if (nsends) { 351 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 352 } 353 354 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 355 356 /* create redundant matrix */ 357 /*-------------------------*/ 358 if (reuse == MAT_INITIAL_MATRIX){ 359 /* compute rownz_max for preallocation */ 360 for (imdex=0; imdex<nrecvs; imdex++){ 361 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 362 rptr = rbuf_j[imdex]; 363 for (i=0; i<j; i++){ 364 ncols = rptr[i+1] - rptr[i]; 365 if (rownz_max < ncols) rownz_max = ncols; 366 } 367 } 368 369 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 370 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 371 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 372 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 373 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 374 } else { 375 C = *matredundant; 376 } 377 378 /* insert local matrix entries */ 379 rptr = sbuf_j; 380 cols = sbuf_j + rend-rstart + 1; 381 vals = sbuf_a; 382 for (i=0; i<rend-rstart; i++){ 383 row = i + rstart; 384 ncols = rptr[i+1] - rptr[i]; 385 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 386 vals += ncols; 387 cols += ncols; 388 } 389 /* insert received matrix entries */ 390 for (imdex=0; imdex<nrecvs; imdex++){ 391 rstart = rowrange[recv_rank[imdex]]; 392 rend = rowrange[recv_rank[imdex]+1]; 393 rptr = rbuf_j[imdex]; 394 cols = rbuf_j[imdex] + rend-rstart + 1; 395 vals = rbuf_a[imdex]; 396 for (i=0; i<rend-rstart; i++){ 397 row = i + rstart; 398 ncols = rptr[i+1] - rptr[i]; 399 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 400 vals += ncols; 401 cols += ncols; 402 } 403 } 404 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 407 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 408 if (reuse == MAT_INITIAL_MATRIX){ 409 PetscObjectContainer container; 410 *matredundant = C; 411 /* create a supporting struct and attach it to C for reuse */ 412 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 413 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 414 ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr); 415 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 416 ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr); 417 418 redund->nzlocal = nzlocal; 419 redund->nsends = nsends; 420 redund->nrecvs = nrecvs; 421 redund->send_rank = send_rank; 422 redund->sbuf_nz = sbuf_nz; 423 redund->sbuf_j = sbuf_j; 424 redund->sbuf_a = sbuf_a; 425 redund->rbuf_j = rbuf_j; 426 redund->rbuf_a = rbuf_a; 427 428 redund->MatDestroy = C->ops->destroy; 429 C->ops->destroy = MatDestroy_MatRedundant; 430 } 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "PCSetUp_Redundant" 436 static PetscErrorCode PCSetUp_Redundant(PC pc) 437 { 438 PC_Redundant *red = (PC_Redundant*)pc->data; 439 PetscErrorCode ierr; 440 PetscInt mstart,mend,mlocal,m; 441 PetscMPIInt size; 442 IS isl; 443 MatReuse reuse = MAT_INITIAL_MATRIX; 444 MatStructure str = DIFFERENT_NONZERO_PATTERN; 445 MPI_Comm comm; 446 Vec vec; 447 448 PetscInt mlocal_sub; 449 PetscMPIInt subsize,subrank; 450 PetscInt rstart_sub,rend_sub,mloc_sub; 451 452 PetscFunctionBegin; 453 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 454 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 455 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 456 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 457 if (!pc->setupcalled) { 458 /* create working vectors xsub/ysub and xdup/ydup */ 459 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 460 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 461 462 /* get local size of xsub/ysub */ 463 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 464 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 465 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 466 if (subrank+1 < subsize){ 467 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 468 } else { 469 rend_sub = m; 470 } 471 mloc_sub = rend_sub - rstart_sub; 472 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 473 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 474 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 475 476 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 477 Note: we use communicator dupcomm, not pc->comm! */ 478 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 479 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 480 481 /* create vec scatters */ 482 if (!red->scatterin){ 483 IS is1,is2; 484 PetscInt *idx1,*idx2,i,j,k; 485 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 486 idx2 = idx1 + red->nsubcomm*mlocal; 487 j = 0; 488 for (k=0; k<red->nsubcomm; k++){ 489 for (i=mstart; i<mend; i++){ 490 idx1[j] = i; 491 idx2[j++] = i + m*k; 492 } 493 } 494 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 495 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 496 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 497 ierr = ISDestroy(is1);CHKERRQ(ierr); 498 ierr = ISDestroy(is2);CHKERRQ(ierr); 499 500 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 501 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 502 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 503 ierr = ISDestroy(is1);CHKERRQ(ierr); 504 ierr = ISDestroy(is2);CHKERRQ(ierr); 505 ierr = PetscFree(idx1);CHKERRQ(ierr); 506 } 507 } 508 ierr = VecDestroy(vec);CHKERRQ(ierr); 509 510 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 512 if (size == 1) { 513 red->useparallelmat = PETSC_FALSE; 514 } 515 516 if (red->useparallelmat) { 517 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 518 /* destroy old matrices */ 519 if (red->pmats) { 520 //ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 521 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 522 } 523 } else if (pc->setupcalled == 1) { 524 reuse = MAT_REUSE_MATRIX; 525 str = SAME_NONZERO_PATTERN; 526 } 527 528 /* grab the parallel matrix and put it into processors of a subcomminicator */ 529 /*--------------------------------------------------------------------------*/ 530 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 531 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr); 532 /* tell PC of the subcommunicator its operators */ 533 ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr); 534 } else { 535 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 536 } 537 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 538 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "PCApply_Redundant" 544 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 545 { 546 PC_Redundant *red = (PC_Redundant*)pc->data; 547 PetscErrorCode ierr; 548 PetscScalar *array; 549 550 PetscFunctionBegin; 551 /* scatter x to xdup */ 552 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 553 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 554 555 /* place xdup's local array into xsub */ 556 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 557 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 558 559 /* apply preconditioner on each processor */ 560 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 561 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 562 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 563 564 /* place ysub's local array into ydup */ 565 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 566 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 567 568 /* scatter ydup to y */ 569 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 570 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 571 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 572 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "PCDestroy_Redundant" 578 static PetscErrorCode PCDestroy_Redundant(PC pc) 579 { 580 PC_Redundant *red = (PC_Redundant*)pc->data; 581 PetscErrorCode ierr; 582 583 PetscFunctionBegin; 584 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 585 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 586 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 587 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 588 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 589 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 590 if (red->pmats) { 591 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 592 } 593 594 595 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 596 ierr = PetscFree(red);CHKERRQ(ierr); 597 PetscFunctionReturn(0); 598 } 599 600 #undef __FUNCT__ 601 #define __FUNCT__ "PCSetFromOptions_Redundant" 602 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 603 { 604 PetscFunctionBegin; 605 PetscFunctionReturn(0); 606 } 607 608 EXTERN_C_BEGIN 609 #undef __FUNCT__ 610 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 611 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 612 { 613 PC_Redundant *red = (PC_Redundant*)pc->data; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 red->scatterin = in; 618 red->scatterout = out; 619 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 620 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 EXTERN_C_END 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "PCRedundantSetScatter" 627 /*@ 628 PCRedundantSetScatter - Sets the scatter used to copy values into the 629 redundant local solve and the scatter to move them back into the global 630 vector. 631 632 Collective on PC 633 634 Input Parameters: 635 + pc - the preconditioner context 636 . in - the scatter to move the values in 637 - out - the scatter to move them out 638 639 Level: advanced 640 641 .keywords: PC, redundant solve 642 @*/ 643 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 644 { 645 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 646 647 PetscFunctionBegin; 648 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 649 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 650 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 651 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 652 if (f) { 653 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 EXTERN_C_BEGIN 659 #undef __FUNCT__ 660 #define __FUNCT__ "PCRedundantGetPC_Redundant" 661 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 662 { 663 PC_Redundant *red = (PC_Redundant*)pc->data; 664 665 PetscFunctionBegin; 666 *innerpc = red->pc; 667 PetscFunctionReturn(0); 668 } 669 EXTERN_C_END 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "PCRedundantGetPC" 673 /*@ 674 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 675 676 Not Collective 677 678 Input Parameter: 679 . pc - the preconditioner context 680 681 Output Parameter: 682 . innerpc - the sequential PC 683 684 Level: advanced 685 686 .keywords: PC, redundant solve 687 @*/ 688 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 689 { 690 PetscErrorCode ierr,(*f)(PC,PC*); 691 692 PetscFunctionBegin; 693 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 694 PetscValidPointer(innerpc,2); 695 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 696 if (f) { 697 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 698 } 699 PetscFunctionReturn(0); 700 } 701 702 EXTERN_C_BEGIN 703 #undef __FUNCT__ 704 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 705 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 706 { 707 PC_Redundant *red = (PC_Redundant*)pc->data; 708 709 PetscFunctionBegin; 710 if (mat) *mat = red->pmats; 711 if (pmat) *pmat = red->pmats; 712 PetscFunctionReturn(0); 713 } 714 EXTERN_C_END 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "PCRedundantGetOperators" 718 /*@ 719 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 720 721 Not Collective 722 723 Input Parameter: 724 . pc - the preconditioner context 725 726 Output Parameters: 727 + mat - the matrix 728 - pmat - the (possibly different) preconditioner matrix 729 730 Level: advanced 731 732 .keywords: PC, redundant solve 733 @*/ 734 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 735 { 736 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 737 738 PetscFunctionBegin; 739 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 740 if (mat) PetscValidPointer(mat,2); 741 if (pmat) PetscValidPointer(pmat,3); 742 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 743 if (f) { 744 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 745 } 746 PetscFunctionReturn(0); 747 } 748 749 /* -------------------------------------------------------------------------------------*/ 750 /*MC 751 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 752 753 754 Options for the redundant preconditioners can be set with -redundant_pc_xxx 755 756 Level: intermediate 757 758 759 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 760 PCRedundantGetPC(), PCRedundantGetOperators() 761 762 M*/ 763 764 EXTERN_C_BEGIN 765 #undef __FUNCT__ 766 #define __FUNCT__ "PCCreate_Redundant" 767 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 768 { 769 PetscErrorCode ierr; 770 PC_Redundant *red; 771 const char *prefix; 772 773 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 774 PetscMPIInt rank,size,subrank,*subsize; 775 MPI_Comm subcomm; 776 PetscMPIInt duprank; 777 PetscMPIInt rank_dup,size_dup; 778 MPI_Comm dupcomm; 779 780 PetscFunctionBegin; 781 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 782 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 783 red->useparallelmat = PETSC_TRUE; 784 785 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 786 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 787 nsubcomm = size; 788 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 789 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 790 791 /* get size of each subcommunicators */ 792 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 793 np_subcomm = size/nsubcomm; 794 nleftover = size - nsubcomm*np_subcomm; 795 for (i=0; i<nsubcomm; i++){ 796 subsize[i] = np_subcomm; 797 if (i<nleftover) subsize[i]++; 798 } 799 800 /* find color for this proc */ 801 color = rank%nsubcomm; 802 subrank = rank/nsubcomm; 803 804 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 805 red->subcomm = subcomm; 806 red->color = color; 807 red->nsubcomm = nsubcomm; 808 809 j = 0; duprank = 0; 810 for (i=0; i<nsubcomm; i++){ 811 if (j == color){ 812 duprank += subrank; 813 break; 814 } 815 duprank += subsize[i]; j++; 816 } 817 /* 818 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 819 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 820 */ 821 822 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 823 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 824 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 825 /* 826 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 827 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 828 */ 829 red->dupcomm = dupcomm; 830 ierr = PetscFree(subsize);CHKERRQ(ierr); 831 832 /* create the sequential PC that each processor has copy of */ 833 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 834 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 835 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 836 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 837 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 838 839 pc->ops->apply = PCApply_Redundant; 840 pc->ops->applytranspose = 0; 841 pc->ops->setup = PCSetUp_Redundant; 842 pc->ops->destroy = PCDestroy_Redundant; 843 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 844 pc->ops->setuponblocks = 0; 845 pc->ops->view = PCView_Redundant; 846 pc->ops->applyrichardson = 0; 847 848 pc->data = (void*)red; 849 850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 851 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 853 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 855 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 856 857 PetscFunctionReturn(0); 858 } 859 EXTERN_C_END 860