1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 14 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 15 PetscTruth useparallelmat; 16 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 17 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 18 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 19 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 20 PetscInt color; /* color of processors in a subcommunicator */ 21 } PC_Redundant; 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "PCView_Redundant" 25 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 26 { 27 PC_Redundant *red = (PC_Redundant*)pc->data; 28 PetscErrorCode ierr; 29 PetscMPIInt rank; 30 PetscTruth iascii,isstring; 31 PetscViewer sviewer,subviewer; 32 PetscInt color = red->color; 33 34 PetscFunctionBegin; 35 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 37 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 38 if (iascii) { 39 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: First PC (color=0) follows\n");CHKERRQ(ierr); 40 ierr = PetscViewerGetSubcomm(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr); 41 if (!color) { /* only view first redundant pc */ 42 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 43 ierr = PCView(red->pc,subviewer);CHKERRQ(ierr); 44 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 45 } 46 ierr = PetscViewerRestoreSubcomm(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr); 47 } else if (isstring) { /* not test it yet! */ 48 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 49 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 50 if (!rank) { 51 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 54 } else { 55 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #include "include/private/matimpl.h" /*I "petscmat.h" I*/ 61 #include "private/vecimpl.h" 62 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 63 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 64 65 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 66 PetscInt nzlocal,nsends,nrecvs; 67 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 68 PetscScalar *sbuf_a,**rbuf_a; 69 PetscErrorCode (*MatDestroy)(Mat); 70 } Mat_Redundant; 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 74 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 75 { 76 PetscErrorCode ierr; 77 Mat_Redundant *redund=(Mat_Redundant*)ptr; 78 PetscInt i; 79 80 PetscFunctionBegin; 81 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 82 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 83 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 84 for (i=0; i<redund->nrecvs; i++){ 85 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 86 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 87 } 88 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 89 ierr = PetscFree(redund);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatDestroy_MatRedundant" 95 PetscErrorCode MatDestroy_MatRedundant(Mat A) 96 { 97 PetscErrorCode ierr; 98 PetscContainer container; 99 Mat_Redundant *redund=PETSC_NULL; 100 101 PetscFunctionBegin; 102 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 103 if (container) { 104 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 105 } else { 106 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 107 } 108 A->ops->destroy = redund->MatDestroy; 109 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 110 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 111 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetRedundantMatrix" 117 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 118 { 119 PetscMPIInt rank,size; 120 MPI_Comm comm=mat->comm; 121 PetscErrorCode ierr; 122 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 123 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 124 PetscInt *rowrange=mat->rmap.range; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 126 Mat A=aij->A,B=aij->B,C=*matredundant; 127 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 128 PetscScalar *sbuf_a; 129 PetscInt nzlocal=a->nz+b->nz; 130 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 131 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 132 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 133 PetscScalar *vals,*aworkA,*aworkB; 134 PetscMPIInt tag1,tag2,tag3,imdex; 135 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 136 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 137 MPI_Status recv_status,*send_status; 138 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 139 PetscInt **rbuf_j=PETSC_NULL; 140 PetscScalar **rbuf_a=PETSC_NULL; 141 Mat_Redundant *redund=PETSC_NULL; 142 PetscContainer container; 143 144 PetscFunctionBegin; 145 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 146 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 147 148 if (reuse == MAT_REUSE_MATRIX) { 149 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 150 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 151 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 152 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 153 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 154 if (container) { 155 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 156 } else { 157 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 158 } 159 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 160 161 nsends = redund->nsends; 162 nrecvs = redund->nrecvs; 163 send_rank = redund->send_rank; recv_rank = send_rank + size; 164 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 165 sbuf_j = redund->sbuf_j; 166 sbuf_a = redund->sbuf_a; 167 rbuf_j = redund->rbuf_j; 168 rbuf_a = redund->rbuf_a; 169 } 170 171 if (reuse == MAT_INITIAL_MATRIX){ 172 PetscMPIInt subrank,subsize; 173 PetscInt nleftover,np_subcomm; 174 /* get the destination processors' id send_rank, nsends and nrecvs */ 175 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 176 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 177 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 178 recv_rank = send_rank + size; 179 np_subcomm = size/nsubcomm; 180 nleftover = size - nsubcomm*np_subcomm; 181 nsends = 0; nrecvs = 0; 182 for (i=0; i<size; i++){ /* i=rank*/ 183 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 184 send_rank[nsends] = i; nsends++; 185 recv_rank[nrecvs++] = i; 186 } 187 } 188 if (rank >= size - nleftover){/* this proc is a leftover processor */ 189 i = size-nleftover-1; 190 j = 0; 191 while (j < nsubcomm - nleftover){ 192 send_rank[nsends++] = i; 193 i--; j++; 194 } 195 } 196 197 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 198 for (i=0; i<nleftover; i++){ 199 recv_rank[nrecvs++] = size-nleftover+i; 200 } 201 } 202 203 /* allocate sbuf_j, sbuf_a */ 204 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 205 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 206 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 207 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 208 209 /* copy mat's local entries into the buffers */ 210 if (reuse == MAT_INITIAL_MATRIX){ 211 rownz_max = 0; 212 rptr = sbuf_j; 213 cols = sbuf_j + rend-rstart + 1; 214 vals = sbuf_a; 215 rptr[0] = 0; 216 for (i=0; i<rend-rstart; i++){ 217 row = i + rstart; 218 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 219 ncols = nzA + nzB; 220 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 221 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 222 /* load the column indices for this row into cols */ 223 lwrite = 0; 224 for (l=0; l<nzB; l++) { 225 if ((ctmp = bmap[cworkB[l]]) < cstart){ 226 vals[lwrite] = aworkB[l]; 227 cols[lwrite++] = ctmp; 228 } 229 } 230 for (l=0; l<nzA; l++){ 231 vals[lwrite] = aworkA[l]; 232 cols[lwrite++] = cstart + cworkA[l]; 233 } 234 for (l=0; l<nzB; l++) { 235 if ((ctmp = bmap[cworkB[l]]) >= cend){ 236 vals[lwrite] = aworkB[l]; 237 cols[lwrite++] = ctmp; 238 } 239 } 240 vals += ncols; 241 cols += ncols; 242 rptr[i+1] = rptr[i] + ncols; 243 if (rownz_max < ncols) rownz_max = ncols; 244 } 245 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 246 } else { /* only copy matrix values into sbuf_a */ 247 rptr = sbuf_j; 248 vals = sbuf_a; 249 rptr[0] = 0; 250 for (i=0; i<rend-rstart; i++){ 251 row = i + rstart; 252 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 253 ncols = nzA + nzB; 254 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 255 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 256 lwrite = 0; 257 for (l=0; l<nzB; l++) { 258 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 259 } 260 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 261 for (l=0; l<nzB; l++) { 262 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 263 } 264 vals += ncols; 265 rptr[i+1] = rptr[i] + ncols; 266 } 267 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 268 269 /* send nzlocal to others, and recv other's nzlocal */ 270 /*--------------------------------------------------*/ 271 if (reuse == MAT_INITIAL_MATRIX){ 272 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 273 s_waits2 = s_waits3 + nsends; 274 s_waits1 = s_waits2 + nsends; 275 r_waits1 = s_waits1 + nsends; 276 r_waits2 = r_waits1 + nrecvs; 277 r_waits3 = r_waits2 + nrecvs; 278 } else { 279 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 280 r_waits3 = s_waits3 + nsends; 281 } 282 283 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 284 if (reuse == MAT_INITIAL_MATRIX){ 285 /* get new tags to keep the communication clean */ 286 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 287 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 288 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 289 rbuf_nz = sbuf_nz + nsends; 290 291 /* post receives of other's nzlocal */ 292 for (i=0; i<nrecvs; i++){ 293 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 294 } 295 /* send nzlocal to others */ 296 for (i=0; i<nsends; i++){ 297 sbuf_nz[i] = nzlocal; 298 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 299 } 300 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 301 count = nrecvs; 302 while (count) { 303 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 304 recv_rank[imdex] = recv_status.MPI_SOURCE; 305 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 306 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 307 308 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 309 rbuf_nz[imdex] += i + 2; 310 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 311 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 312 count--; 313 } 314 /* wait on sends of nzlocal */ 315 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 316 /* send mat->i,j to others, and recv from other's */ 317 /*------------------------------------------------*/ 318 for (i=0; i<nsends; i++){ 319 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 320 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 321 } 322 /* wait on receives of mat->i,j */ 323 /*------------------------------*/ 324 count = nrecvs; 325 while (count) { 326 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 327 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 328 count--; 329 } 330 /* wait on sends of mat->i,j */ 331 /*---------------------------*/ 332 if (nsends) { 333 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 334 } 335 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 336 337 /* post receives, send and receive mat->a */ 338 /*----------------------------------------*/ 339 for (imdex=0; imdex<nrecvs; imdex++) { 340 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 341 } 342 for (i=0; i<nsends; i++){ 343 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 344 } 345 count = nrecvs; 346 while (count) { 347 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 348 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 349 count--; 350 } 351 if (nsends) { 352 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 353 } 354 355 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 356 357 /* create redundant matrix */ 358 /*-------------------------*/ 359 if (reuse == MAT_INITIAL_MATRIX){ 360 /* compute rownz_max for preallocation */ 361 for (imdex=0; imdex<nrecvs; imdex++){ 362 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 363 rptr = rbuf_j[imdex]; 364 for (i=0; i<j; i++){ 365 ncols = rptr[i+1] - rptr[i]; 366 if (rownz_max < ncols) rownz_max = ncols; 367 } 368 } 369 370 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 371 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 372 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 374 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 375 } else { 376 C = *matredundant; 377 } 378 379 /* insert local matrix entries */ 380 rptr = sbuf_j; 381 cols = sbuf_j + rend-rstart + 1; 382 vals = sbuf_a; 383 for (i=0; i<rend-rstart; i++){ 384 row = i + rstart; 385 ncols = rptr[i+1] - rptr[i]; 386 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 387 vals += ncols; 388 cols += ncols; 389 } 390 /* insert received matrix entries */ 391 for (imdex=0; imdex<nrecvs; imdex++){ 392 rstart = rowrange[recv_rank[imdex]]; 393 rend = rowrange[recv_rank[imdex]+1]; 394 rptr = rbuf_j[imdex]; 395 cols = rbuf_j[imdex] + rend-rstart + 1; 396 vals = rbuf_a[imdex]; 397 for (i=0; i<rend-rstart; i++){ 398 row = i + rstart; 399 ncols = rptr[i+1] - rptr[i]; 400 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 401 vals += ncols; 402 cols += ncols; 403 } 404 } 405 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 407 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 408 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 409 if (reuse == MAT_INITIAL_MATRIX){ 410 PetscContainer container; 411 *matredundant = C; 412 /* create a supporting struct and attach it to C for reuse */ 413 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 414 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 415 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 416 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 417 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 418 419 redund->nzlocal = nzlocal; 420 redund->nsends = nsends; 421 redund->nrecvs = nrecvs; 422 redund->send_rank = send_rank; 423 redund->sbuf_nz = sbuf_nz; 424 redund->sbuf_j = sbuf_j; 425 redund->sbuf_a = sbuf_a; 426 redund->rbuf_j = rbuf_j; 427 redund->rbuf_a = rbuf_a; 428 429 redund->MatDestroy = C->ops->destroy; 430 C->ops->destroy = MatDestroy_MatRedundant; 431 } 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNCT__ 436 #define __FUNCT__ "PCSetUp_Redundant" 437 static PetscErrorCode PCSetUp_Redundant(PC pc) 438 { 439 PC_Redundant *red = (PC_Redundant*)pc->data; 440 PetscErrorCode ierr; 441 PetscInt mstart,mend,mlocal,m; 442 PetscMPIInt size; 443 MatReuse reuse = MAT_INITIAL_MATRIX; 444 MatStructure str = DIFFERENT_NONZERO_PATTERN; 445 MPI_Comm comm; 446 Vec vec; 447 448 PetscInt mlocal_sub; 449 PetscMPIInt subsize,subrank; 450 PetscInt rstart_sub,rend_sub,mloc_sub; 451 452 PetscFunctionBegin; 453 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 454 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 455 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 456 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 457 if (!pc->setupcalled) { 458 /* create working vectors xsub/ysub and xdup/ydup */ 459 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 460 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 461 462 /* get local size of xsub/ysub */ 463 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 464 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 465 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 466 if (subrank+1 < subsize){ 467 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 468 } else { 469 rend_sub = m; 470 } 471 mloc_sub = rend_sub - rstart_sub; 472 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 473 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 474 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 475 476 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 477 Note: we use communicator dupcomm, not pc->comm! */ 478 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 479 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 480 481 /* create vec scatters */ 482 if (!red->scatterin){ 483 IS is1,is2; 484 PetscInt *idx1,*idx2,i,j,k; 485 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 486 idx2 = idx1 + red->nsubcomm*mlocal; 487 j = 0; 488 for (k=0; k<red->nsubcomm; k++){ 489 for (i=mstart; i<mend; i++){ 490 idx1[j] = i; 491 idx2[j++] = i + m*k; 492 } 493 } 494 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 495 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 496 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 497 ierr = ISDestroy(is1);CHKERRQ(ierr); 498 ierr = ISDestroy(is2);CHKERRQ(ierr); 499 500 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 501 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 502 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 503 ierr = ISDestroy(is1);CHKERRQ(ierr); 504 ierr = ISDestroy(is2);CHKERRQ(ierr); 505 ierr = PetscFree(idx1);CHKERRQ(ierr); 506 } 507 } 508 ierr = VecDestroy(vec);CHKERRQ(ierr); 509 510 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 512 if (size == 1) { 513 red->useparallelmat = PETSC_FALSE; 514 } 515 516 if (red->useparallelmat) { 517 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 518 /* destroy old matrices */ 519 if (red->pmats) { 520 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 521 } 522 } else if (pc->setupcalled == 1) { 523 reuse = MAT_REUSE_MATRIX; 524 str = SAME_NONZERO_PATTERN; 525 } 526 527 /* grab the parallel matrix and put it into processors of a subcomminicator */ 528 /*--------------------------------------------------------------------------*/ 529 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 530 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr); 531 /* tell PC of the subcommunicator its operators */ 532 ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr); 533 } else { 534 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 535 } 536 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 537 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "PCApply_Redundant" 543 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 544 { 545 PC_Redundant *red = (PC_Redundant*)pc->data; 546 PetscErrorCode ierr; 547 PetscScalar *array; 548 549 PetscFunctionBegin; 550 /* scatter x to xdup */ 551 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 552 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 553 554 /* place xdup's local array into xsub */ 555 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 556 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 557 558 /* apply preconditioner on each processor */ 559 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 560 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 561 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 562 563 /* place ysub's local array into ydup */ 564 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 565 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 566 567 /* scatter ydup to y */ 568 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 569 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 570 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 571 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "PCDestroy_Redundant" 577 static PetscErrorCode PCDestroy_Redundant(PC pc) 578 { 579 PC_Redundant *red = (PC_Redundant*)pc->data; 580 PetscErrorCode ierr; 581 582 PetscFunctionBegin; 583 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 584 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 585 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 586 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 587 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 588 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 589 if (red->pmats) { 590 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 591 } 592 593 594 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 595 ierr = PetscFree(red);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCSetFromOptions_Redundant" 601 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 602 { 603 PetscErrorCode ierr; 604 PC_Redundant *red = (PC_Redundant*)pc->data; 605 606 PetscFunctionBegin; 607 ierr = PetscOptionsHead("Redundant options");CHKERRQ(ierr); 608 ierr = PetscOptionsInt("-pc_redundant_number_comm","Number of subcommunicators","PCRedundantSetNumComm",red->nsubcomm,&red->nsubcomm,0);CHKERRQ(ierr); 609 ierr = PetscOptionsTail();CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 616 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 617 { 618 PC_Redundant *red = (PC_Redundant*)pc->data; 619 PetscErrorCode ierr; 620 621 PetscFunctionBegin; 622 red->scatterin = in; 623 red->scatterout = out; 624 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 625 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 EXTERN_C_END 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "PCRedundantSetScatter" 632 /*@ 633 PCRedundantSetScatter - Sets the scatter used to copy values into the 634 redundant local solve and the scatter to move them back into the global 635 vector. 636 637 Collective on PC 638 639 Input Parameters: 640 + pc - the preconditioner context 641 . in - the scatter to move the values in 642 - out - the scatter to move them out 643 644 Level: advanced 645 646 .keywords: PC, redundant solve 647 @*/ 648 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 649 { 650 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 651 652 PetscFunctionBegin; 653 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 654 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 655 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 656 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 657 if (f) { 658 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 EXTERN_C_BEGIN 664 #undef __FUNCT__ 665 #define __FUNCT__ "PCRedundantGetPC_Redundant" 666 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 667 { 668 PC_Redundant *red = (PC_Redundant*)pc->data; 669 670 PetscFunctionBegin; 671 *innerpc = red->pc; 672 PetscFunctionReturn(0); 673 } 674 EXTERN_C_END 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCRedundantGetPC" 678 /*@ 679 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 680 681 Not Collective 682 683 Input Parameter: 684 . pc - the preconditioner context 685 686 Output Parameter: 687 . innerpc - the sequential PC 688 689 Level: advanced 690 691 .keywords: PC, redundant solve 692 @*/ 693 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 694 { 695 PetscErrorCode ierr,(*f)(PC,PC*); 696 697 PetscFunctionBegin; 698 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 699 PetscValidPointer(innerpc,2); 700 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 701 if (f) { 702 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 703 } 704 PetscFunctionReturn(0); 705 } 706 707 EXTERN_C_BEGIN 708 #undef __FUNCT__ 709 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 710 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 711 { 712 PC_Redundant *red = (PC_Redundant*)pc->data; 713 714 PetscFunctionBegin; 715 if (mat) *mat = red->pmats; 716 if (pmat) *pmat = red->pmats; 717 PetscFunctionReturn(0); 718 } 719 EXTERN_C_END 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "PCRedundantGetOperators" 723 /*@ 724 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 725 726 Not Collective 727 728 Input Parameter: 729 . pc - the preconditioner context 730 731 Output Parameters: 732 + mat - the matrix 733 - pmat - the (possibly different) preconditioner matrix 734 735 Level: advanced 736 737 .keywords: PC, redundant solve 738 @*/ 739 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 740 { 741 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 742 743 PetscFunctionBegin; 744 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 745 if (mat) PetscValidPointer(mat,2); 746 if (pmat) PetscValidPointer(pmat,3); 747 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 748 if (f) { 749 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 750 } 751 PetscFunctionReturn(0); 752 } 753 754 /* -------------------------------------------------------------------------------------*/ 755 /*MC 756 PCREDUNDANT - Runs a preconditioner for the entire problem on subgroups of processors 757 758 759 Options for the redundant preconditioners can be set with -redundant_pc_xxx 760 761 Options Database: 762 . -pc_redundant_number_comm - number of sub communicators to use 763 764 Level: intermediate 765 766 767 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 768 PCRedundantGetPC(), PCRedundantGetOperators() 769 770 M*/ 771 772 EXTERN_C_BEGIN 773 #undef __FUNCT__ 774 #define __FUNCT__ "PCCreate_Redundant" 775 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 776 { 777 PetscErrorCode ierr; 778 PC_Redundant *red; 779 const char *prefix; 780 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 781 PetscMPIInt rank,size,subrank,*subsize,duprank; 782 MPI_Comm subcomm=0,dupcomm=0; 783 784 PetscFunctionBegin; 785 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 786 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 787 red->useparallelmat = PETSC_TRUE; 788 789 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 790 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 791 nsubcomm = size; 792 ierr = PetscOptionsGetInt(PETSC_NULL,"-pc_redundant_number_comm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 793 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 794 795 /*-------------------------------------------------------------------------------------------------- 796 To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a 797 processe into a subcomm. 798 An example: size=4, nsubcomm=3 799 pc->comm: 800 rank: [0] [1] [2] [3] 801 color: 0 1 2 0 802 803 subcomm: 804 subrank: [0] [0] [0] [1] 805 806 dupcomm: 807 duprank: [0] [2] [3] [1] 808 809 Here, subcomm[color = 0] has subsize=2, owns process [0] and [3] 810 subcomm[color = 1] has subsize=1, owns process [1] 811 subcomm[color = 2] has subsize=1, owns process [2] 812 dupcomm has same number of processes as pc->comm, and its duprank maps 813 processes in subcomm contiguously into a 1d array: 814 duprank: [0] [1] [2] [3] 815 rank: [0] [3] [1] [2] 816 subcomm[0] subcomm[1] subcomm[2] 817 ----------------------------------------------------------------------------------------*/ 818 819 /* get size of each subcommunicators */ 820 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 821 np_subcomm = size/nsubcomm; 822 nleftover = size - nsubcomm*np_subcomm; 823 for (i=0; i<nsubcomm; i++){ 824 subsize[i] = np_subcomm; 825 if (i<nleftover) subsize[i]++; 826 } 827 828 /* find color for this proc */ 829 color = rank%nsubcomm; 830 subrank = rank/nsubcomm; 831 832 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 833 red->subcomm = subcomm; 834 red->color = color; 835 red->nsubcomm = nsubcomm; 836 837 j = 0; duprank = 0; 838 for (i=0; i<nsubcomm; i++){ 839 if (j == color){ 840 duprank += subrank; 841 break; 842 } 843 duprank += subsize[i]; j++; 844 } 845 846 /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */ 847 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 848 red->dupcomm = dupcomm; 849 ierr = PetscFree(subsize);CHKERRQ(ierr); 850 851 /* create the sequential PC that each processor has copy of */ 852 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 853 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 854 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 855 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 856 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 857 858 pc->ops->apply = PCApply_Redundant; 859 pc->ops->applytranspose = 0; 860 pc->ops->setup = PCSetUp_Redundant; 861 pc->ops->destroy = PCDestroy_Redundant; 862 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 863 pc->ops->setuponblocks = 0; 864 pc->ops->view = PCView_Redundant; 865 pc->ops->applyrichardson = 0; 866 867 pc->data = (void*)red; 868 869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 870 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 871 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 872 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 873 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 874 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 875 876 PetscFunctionReturn(0); 877 } 878 EXTERN_C_END 879