1 #define PETSCKSP_DLL 2 3 /* 4 This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner. 5 */ 6 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 7 #include "petscksp.h" 8 9 typedef struct { 10 PC pc; /* actual preconditioner used on each processor */ 11 Vec xsub,ysub; /* vectors of a subcommunicator to hold parallel vectors of pc->comm */ 12 Vec xdup,ydup; /* parallel vector that congregates xsub or ysub facilitating vector scattering */ 13 Mat pmats; /* matrix and optional preconditioner matrix belong to a subcommunicator */ 14 VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */ 15 PetscTruth useparallelmat; 16 MPI_Comm subcomm; /* processors belong to a subcommunicator implement a PC in parallel */ 17 MPI_Comm dupcomm; /* processors belong to pc->comm with their rank remapped in the way 18 that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */ 19 PetscInt nsubcomm; /* num of subcommunicators, which equals the num of redundant matrix systems */ 20 PetscInt color; /* color of processors in a subcommunicator */ 21 } PC_Redundant; 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "PCView_Redundant" 25 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer) 26 { 27 PC_Redundant *red = (PC_Redundant*)pc->data; 28 PetscErrorCode ierr; 29 PetscMPIInt rank; 30 PetscTruth iascii,isstring; 31 PetscViewer sviewer; 32 33 PetscFunctionBegin; 34 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 35 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 36 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 37 if (iascii) { 38 ierr = PetscViewerASCIIPrintf(viewer," Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr); 39 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 40 if (!rank) { 41 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 42 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 43 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 44 } 45 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 46 } else if (isstring) { 47 ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr); 48 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 49 if (!rank) { 50 ierr = PCView(red->pc,sviewer);CHKERRQ(ierr); 51 } 52 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 53 } else { 54 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 60 #include "private/vecimpl.h" 61 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 62 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 63 64 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 65 PetscInt nzlocal,nsends,nrecvs; 66 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 67 PetscScalar *sbuf_a,**rbuf_a; 68 PetscErrorCode (*MatDestroy)(Mat); 69 } Mat_Redundant; 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant" 73 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr) 74 { 75 PetscErrorCode ierr; 76 Mat_Redundant *redund=(Mat_Redundant*)ptr; 77 PetscInt i; 78 79 PetscFunctionBegin; 80 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 81 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 82 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 83 for (i=0; i<redund->nrecvs; i++){ 84 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 85 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 86 } 87 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 88 ierr = PetscFree(redund);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 #undef __FUNCT__ 93 #define __FUNCT__ "MatDestroy_MatRedundant" 94 PetscErrorCode MatDestroy_MatRedundant(Mat A) 95 { 96 PetscErrorCode ierr; 97 PetscObjectContainer container; 98 Mat_Redundant *redund=PETSC_NULL; 99 100 PetscFunctionBegin; 101 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 102 if (container) { 103 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 104 } else { 105 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 106 } 107 A->ops->destroy = redund->MatDestroy; 108 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 109 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 110 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatGetRedundantMatrix" 116 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 117 { 118 PetscMPIInt rank,size; 119 MPI_Comm comm=mat->comm; 120 PetscErrorCode ierr; 121 PetscInt nsends,nrecvs,i,prid=100,rownz_max; 122 PetscMPIInt *send_rank,*recv_rank; 123 PetscInt *rowrange=mat->rmap.range; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 125 Mat A=aij->A,B=aij->B,C=*matredundant; 126 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 127 PetscScalar *sbuf_a; 128 PetscInt nzlocal=a->nz+b->nz; 129 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 130 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 131 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 132 PetscScalar *vals,*aworkA,*aworkB; 133 PetscMPIInt tag1,tag2,tag3,imdex; 134 MPI_Request *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3; 135 MPI_Status recv_status,*send_status; 136 PetscInt *sbuf_nz,*rbuf_nz,count; 137 PetscInt **rbuf_j; 138 PetscScalar **rbuf_a; 139 Mat_Redundant *redund=PETSC_NULL; 140 PetscObjectContainer container; 141 142 PetscFunctionBegin; 143 ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr); 144 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 146 147 if (reuse == MAT_REUSE_MATRIX) { 148 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 149 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 150 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 151 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 152 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 153 if (container) { 154 ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 155 } else { 156 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 157 } 158 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 159 160 nsends = redund->nsends; 161 nrecvs = redund->nrecvs; 162 send_rank = redund->send_rank; recv_rank = send_rank + size; 163 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 164 sbuf_j = redund->sbuf_j; 165 sbuf_a = redund->sbuf_a; 166 rbuf_j = redund->rbuf_j; 167 rbuf_a = redund->rbuf_a; 168 } 169 170 if (reuse == MAT_INITIAL_MATRIX){ 171 PetscMPIInt subrank,subsize; 172 PetscInt nleftover,np_subcomm; 173 /* get the destination processors' id send_rank, nsends and nrecvs */ 174 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 175 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 176 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 177 recv_rank = send_rank + size; 178 np_subcomm = size/nsubcomm; 179 nleftover = size - nsubcomm*np_subcomm; 180 nsends = 0; nrecvs = 0; 181 for (i=0; i<size; i++){ /* i=rank*/ 182 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 183 send_rank[nsends] = i; nsends++; 184 recv_rank[nrecvs++] = i; 185 } 186 } 187 if (rank >= size - nleftover){/* this proc is a leftover processor */ 188 i = size-nleftover-1; 189 j = 0; 190 while (j < nsubcomm - nleftover){ 191 send_rank[nsends++] = i; 192 i--; j++; 193 } 194 } 195 196 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 197 for (i=0; i<nleftover; i++){ 198 recv_rank[nrecvs++] = size-nleftover+i; 199 } 200 } 201 202 /* allocate sbuf_j, sbuf_a */ 203 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 204 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 205 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 206 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 207 208 /* copy mat's local entries into the buffers */ 209 if (reuse == MAT_INITIAL_MATRIX){ 210 rownz_max = 0; 211 rptr = sbuf_j; 212 cols = sbuf_j + rend-rstart + 1; 213 vals = sbuf_a; 214 rptr[0] = 0; 215 for (i=0; i<rend-rstart; i++){ 216 row = i + rstart; 217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 218 ncols = nzA + nzB; 219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 221 /* load the column indices for this row into cols */ 222 lwrite = 0; 223 for (l=0; l<nzB; l++) { 224 if ((ctmp = bmap[cworkB[l]]) < cstart){ 225 vals[lwrite] = aworkB[l]; 226 cols[lwrite++] = ctmp; 227 } 228 } 229 for (l=0; l<nzA; l++){ 230 vals[lwrite] = aworkA[l]; 231 cols[lwrite++] = cstart + cworkA[l]; 232 } 233 for (l=0; l<nzB; l++) { 234 if ((ctmp = bmap[cworkB[l]]) >= cend){ 235 vals[lwrite] = aworkB[l]; 236 cols[lwrite++] = ctmp; 237 } 238 } 239 vals += ncols; 240 cols += ncols; 241 rptr[i+1] = rptr[i] + ncols; 242 if (rownz_max < ncols) rownz_max = ncols; 243 } 244 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 245 } else { /* only copy matrix values into sbuf_a */ 246 rptr = sbuf_j; 247 vals = sbuf_a; 248 rptr[0] = 0; 249 for (i=0; i<rend-rstart; i++){ 250 row = i + rstart; 251 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 252 ncols = nzA + nzB; 253 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 254 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 255 lwrite = 0; 256 for (l=0; l<nzB; l++) { 257 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 258 } 259 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 260 for (l=0; l<nzB; l++) { 261 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 262 } 263 vals += ncols; 264 rptr[i+1] = rptr[i] + ncols; 265 } 266 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 267 268 /* send nzlocal to others, and recv other's nzlocal */ 269 /*--------------------------------------------------*/ 270 if (reuse == MAT_INITIAL_MATRIX){ 271 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 272 s_waits2 = s_waits3 + nsends; 273 s_waits1 = s_waits2 + nsends; 274 r_waits1 = s_waits1 + nsends; 275 r_waits2 = r_waits1 + nrecvs; 276 r_waits3 = r_waits2 + nrecvs; 277 } else { 278 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 279 r_waits3 = s_waits3 + nsends; 280 } 281 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 283 if (reuse == MAT_INITIAL_MATRIX){ 284 /* get new tags to keep the communication clean */ 285 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 286 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 287 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 288 rbuf_nz = sbuf_nz + nsends; 289 290 /* post receives of other's nzlocal */ 291 for (i=0; i<nrecvs; i++){ 292 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 293 } 294 /* send nzlocal to others */ 295 for (i=0; i<nsends; i++){ 296 sbuf_nz[i] = nzlocal; 297 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 298 } 299 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 300 count = nrecvs; 301 while (count) { 302 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 303 recv_rank[imdex] = recv_status.MPI_SOURCE; 304 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 305 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 306 307 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 308 rbuf_nz[imdex] += i + 2; 309 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 310 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 311 count--; 312 } 313 /* wait on sends of nzlocal */ 314 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 315 /* send mat->i,j to others, and recv from other's */ 316 /*------------------------------------------------*/ 317 for (i=0; i<nsends; i++){ 318 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 319 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 320 } 321 /* wait on receives of mat->i,j */ 322 /*------------------------------*/ 323 count = nrecvs; 324 while (count) { 325 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 326 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 327 count--; 328 } 329 /* wait on sends of mat->i,j */ 330 /*---------------------------*/ 331 if (nsends) { 332 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 333 } 334 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 335 336 /* post receives, send and receive mat->a */ 337 /*----------------------------------------*/ 338 for (imdex=0; imdex<nrecvs; imdex++) { 339 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 340 } 341 for (i=0; i<nsends; i++){ 342 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 343 } 344 count = nrecvs; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 347 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 348 count--; 349 } 350 if (nsends) { 351 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 352 } 353 354 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 355 356 /* create redundant matrix */ 357 /*-------------------------*/ 358 if (reuse == MAT_INITIAL_MATRIX){ 359 /* compute rownz_max for preallocation */ 360 for (imdex=0; imdex<nrecvs; imdex++){ 361 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 362 rptr = rbuf_j[imdex]; 363 for (i=0; i<j; i++){ 364 ncols = rptr[i+1] - rptr[i]; 365 if (rownz_max < ncols) rownz_max = ncols; 366 } 367 } 368 369 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 370 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 371 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 372 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 373 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 374 } else { 375 C = *matredundant; 376 } 377 378 /* insert local matrix entries */ 379 rptr = sbuf_j; 380 cols = sbuf_j + rend-rstart + 1; 381 vals = sbuf_a; 382 for (i=0; i<rend-rstart; i++){ 383 row = i + rstart; 384 ncols = rptr[i+1] - rptr[i]; 385 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 386 vals += ncols; 387 cols += ncols; 388 } 389 /* insert received matrix entries */ 390 for (imdex=0; imdex<nrecvs; imdex++){ 391 rstart = rowrange[recv_rank[imdex]]; 392 rend = rowrange[recv_rank[imdex]+1]; 393 rptr = rbuf_j[imdex]; 394 cols = rbuf_j[imdex] + rend-rstart + 1; 395 vals = rbuf_a[imdex]; 396 for (i=0; i<rend-rstart; i++){ 397 row = i + rstart; 398 ncols = rptr[i+1] - rptr[i]; 399 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 400 vals += ncols; 401 cols += ncols; 402 } 403 } 404 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 407 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 408 if (reuse == MAT_INITIAL_MATRIX){ 409 PetscObjectContainer container; 410 *matredundant = C; 411 /* create a supporting struct and attach it to C for reuse */ 412 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 413 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 414 ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr); 415 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 416 ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr); 417 418 redund->nzlocal = nzlocal; 419 redund->nsends = nsends; 420 redund->nrecvs = nrecvs; 421 redund->send_rank = send_rank; 422 redund->sbuf_nz = sbuf_nz; 423 redund->sbuf_j = sbuf_j; 424 redund->sbuf_a = sbuf_a; 425 redund->rbuf_j = rbuf_j; 426 redund->rbuf_a = rbuf_a; 427 428 redund->MatDestroy = C->ops->destroy; 429 C->ops->destroy = MatDestroy_MatRedundant; 430 } 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "PCSetUp_Redundant" 436 static PetscErrorCode PCSetUp_Redundant(PC pc) 437 { 438 PC_Redundant *red = (PC_Redundant*)pc->data; 439 PetscErrorCode ierr; 440 PetscInt mstart,mend,mlocal,m; 441 PetscMPIInt size; 442 MatReuse reuse = MAT_INITIAL_MATRIX; 443 MatStructure str = DIFFERENT_NONZERO_PATTERN; 444 MPI_Comm comm; 445 Vec vec; 446 447 PetscInt mlocal_sub; 448 PetscMPIInt subsize,subrank; 449 PetscInt rstart_sub,rend_sub,mloc_sub; 450 451 PetscFunctionBegin; 452 ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr); 453 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 454 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 455 ierr = VecGetSize(vec,&m);CHKERRQ(ierr); 456 if (!pc->setupcalled) { 457 /* create working vectors xsub/ysub and xdup/ydup */ 458 ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr); 459 ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr); 460 461 /* get local size of xsub/ysub */ 462 ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr); 463 ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr); 464 rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */ 465 if (subrank+1 < subsize){ 466 rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)]; 467 } else { 468 rend_sub = m; 469 } 470 mloc_sub = rend_sub - rstart_sub; 471 ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr); 472 /* create xsub with empty local arrays, because xdup's arrays will be placed into it */ 473 ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr); 474 475 /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 476 Note: we use communicator dupcomm, not pc->comm! */ 477 ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); 478 ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr); 479 480 /* create vec scatters */ 481 if (!red->scatterin){ 482 IS is1,is2; 483 PetscInt *idx1,*idx2,i,j,k; 484 ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr); 485 idx2 = idx1 + red->nsubcomm*mlocal; 486 j = 0; 487 for (k=0; k<red->nsubcomm; k++){ 488 for (i=mstart; i<mend; i++){ 489 idx1[j] = i; 490 idx2[j++] = i + m*k; 491 } 492 } 493 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr); 494 ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr); 495 ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); 496 ierr = ISDestroy(is1);CHKERRQ(ierr); 497 ierr = ISDestroy(is2);CHKERRQ(ierr); 498 499 ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr); 500 ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); 501 ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr); 502 ierr = ISDestroy(is1);CHKERRQ(ierr); 503 ierr = ISDestroy(is2);CHKERRQ(ierr); 504 ierr = PetscFree(idx1);CHKERRQ(ierr); 505 } 506 } 507 ierr = VecDestroy(vec);CHKERRQ(ierr); 508 509 /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ 510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 511 if (size == 1) { 512 red->useparallelmat = PETSC_FALSE; 513 } 514 515 if (red->useparallelmat) { 516 if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) { 517 /* destroy old matrices */ 518 if (red->pmats) { 519 //ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr); 520 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 521 } 522 } else if (pc->setupcalled == 1) { 523 reuse = MAT_REUSE_MATRIX; 524 str = SAME_NONZERO_PATTERN; 525 } 526 527 /* grab the parallel matrix and put it into processors of a subcomminicator */ 528 /*--------------------------------------------------------------------------*/ 529 ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr); 530 ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr); 531 /* tell PC of the subcommunicator its operators */ 532 ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr); 533 } else { 534 ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr); 535 } 536 ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr); 537 ierr = PCSetUp(red->pc);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "PCApply_Redundant" 543 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y) 544 { 545 PC_Redundant *red = (PC_Redundant*)pc->data; 546 PetscErrorCode ierr; 547 PetscScalar *array; 548 549 PetscFunctionBegin; 550 /* scatter x to xdup */ 551 ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 552 ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr); 553 554 /* place xdup's local array into xsub */ 555 ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr); 556 ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr); 557 558 /* apply preconditioner on each processor */ 559 ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr); 560 ierr = VecResetArray(red->xsub);CHKERRQ(ierr); 561 ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr); 562 563 /* place ysub's local array into ydup */ 564 ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr); 565 ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr); 566 567 /* scatter ydup to y */ 568 ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 569 ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr); 570 ierr = VecResetArray(red->ydup);CHKERRQ(ierr); 571 ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "PCDestroy_Redundant" 577 static PetscErrorCode PCDestroy_Redundant(PC pc) 578 { 579 PC_Redundant *red = (PC_Redundant*)pc->data; 580 PetscErrorCode ierr; 581 582 PetscFunctionBegin; 583 if (red->scatterin) {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);} 584 if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);} 585 if (red->ysub) {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);} 586 if (red->xsub) {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);} 587 if (red->xdup) {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);} 588 if (red->ydup) {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);} 589 if (red->pmats) { 590 ierr = MatDestroy(red->pmats);CHKERRQ(ierr); 591 } 592 593 594 ierr = PCDestroy(red->pc);CHKERRQ(ierr); 595 ierr = PetscFree(red);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCSetFromOptions_Redundant" 601 static PetscErrorCode PCSetFromOptions_Redundant(PC pc) 602 { 603 PetscFunctionBegin; 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "PCRedundantSetScatter_Redundant" 610 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out) 611 { 612 PC_Redundant *red = (PC_Redundant*)pc->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 red->scatterin = in; 617 red->scatterout = out; 618 ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr); 619 ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 EXTERN_C_END 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCRedundantSetScatter" 626 /*@ 627 PCRedundantSetScatter - Sets the scatter used to copy values into the 628 redundant local solve and the scatter to move them back into the global 629 vector. 630 631 Collective on PC 632 633 Input Parameters: 634 + pc - the preconditioner context 635 . in - the scatter to move the values in 636 - out - the scatter to move them out 637 638 Level: advanced 639 640 .keywords: PC, redundant solve 641 @*/ 642 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out) 643 { 644 PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter); 645 646 PetscFunctionBegin; 647 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 648 PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2); 649 PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3); 650 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr); 651 if (f) { 652 ierr = (*f)(pc,in,out);CHKERRQ(ierr); 653 } 654 PetscFunctionReturn(0); 655 } 656 657 EXTERN_C_BEGIN 658 #undef __FUNCT__ 659 #define __FUNCT__ "PCRedundantGetPC_Redundant" 660 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc) 661 { 662 PC_Redundant *red = (PC_Redundant*)pc->data; 663 664 PetscFunctionBegin; 665 *innerpc = red->pc; 666 PetscFunctionReturn(0); 667 } 668 EXTERN_C_END 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "PCRedundantGetPC" 672 /*@ 673 PCRedundantGetPC - Gets the sequential PC created by the redundant PC. 674 675 Not Collective 676 677 Input Parameter: 678 . pc - the preconditioner context 679 680 Output Parameter: 681 . innerpc - the sequential PC 682 683 Level: advanced 684 685 .keywords: PC, redundant solve 686 @*/ 687 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc) 688 { 689 PetscErrorCode ierr,(*f)(PC,PC*); 690 691 PetscFunctionBegin; 692 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 693 PetscValidPointer(innerpc,2); 694 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr); 695 if (f) { 696 ierr = (*f)(pc,innerpc);CHKERRQ(ierr); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 EXTERN_C_BEGIN 702 #undef __FUNCT__ 703 #define __FUNCT__ "PCRedundantGetOperators_Redundant" 704 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat) 705 { 706 PC_Redundant *red = (PC_Redundant*)pc->data; 707 708 PetscFunctionBegin; 709 if (mat) *mat = red->pmats; 710 if (pmat) *pmat = red->pmats; 711 PetscFunctionReturn(0); 712 } 713 EXTERN_C_END 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "PCRedundantGetOperators" 717 /*@ 718 PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix 719 720 Not Collective 721 722 Input Parameter: 723 . pc - the preconditioner context 724 725 Output Parameters: 726 + mat - the matrix 727 - pmat - the (possibly different) preconditioner matrix 728 729 Level: advanced 730 731 .keywords: PC, redundant solve 732 @*/ 733 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat) 734 { 735 PetscErrorCode ierr,(*f)(PC,Mat*,Mat*); 736 737 PetscFunctionBegin; 738 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 739 if (mat) PetscValidPointer(mat,2); 740 if (pmat) PetscValidPointer(pmat,3); 741 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr); 742 if (f) { 743 ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 /* -------------------------------------------------------------------------------------*/ 749 /*MC 750 PCREDUNDANT - Runs a preconditioner for the entire problem on each processor 751 752 753 Options for the redundant preconditioners can be set with -redundant_pc_xxx 754 755 Level: intermediate 756 757 758 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(), 759 PCRedundantGetPC(), PCRedundantGetOperators() 760 761 M*/ 762 763 EXTERN_C_BEGIN 764 #undef __FUNCT__ 765 #define __FUNCT__ "PCCreate_Redundant" 766 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc) 767 { 768 PetscErrorCode ierr; 769 PC_Redundant *red; 770 const char *prefix; 771 772 PetscInt nsubcomm,np_subcomm,nleftover,i,j,color; 773 PetscMPIInt rank,size,subrank,*subsize; 774 MPI_Comm subcomm; 775 PetscMPIInt duprank; 776 PetscMPIInt rank_dup,size_dup; 777 MPI_Comm dupcomm; 778 779 PetscFunctionBegin; 780 ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr); 781 ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr); 782 red->useparallelmat = PETSC_TRUE; 783 784 ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr); 785 ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr); 786 nsubcomm = size; 787 ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr); 788 if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size); 789 790 /* get size of each subcommunicators */ 791 ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr); 792 np_subcomm = size/nsubcomm; 793 nleftover = size - nsubcomm*np_subcomm; 794 for (i=0; i<nsubcomm; i++){ 795 subsize[i] = np_subcomm; 796 if (i<nleftover) subsize[i]++; 797 } 798 799 /* find color for this proc */ 800 color = rank%nsubcomm; 801 subrank = rank/nsubcomm; 802 803 ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr); 804 red->subcomm = subcomm; 805 red->color = color; 806 red->nsubcomm = nsubcomm; 807 808 j = 0; duprank = 0; 809 for (i=0; i<nsubcomm; i++){ 810 if (j == color){ 811 duprank += subrank; 812 break; 813 } 814 duprank += subsize[i]; j++; 815 } 816 /* 817 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank); 818 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 819 */ 820 821 ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr); 822 ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr); 823 ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr); 824 /* 825 ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank); 826 ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr); 827 */ 828 red->dupcomm = dupcomm; 829 ierr = PetscFree(subsize);CHKERRQ(ierr); 830 831 /* create the sequential PC that each processor has copy of */ 832 ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr); 833 ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr); 834 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 835 ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr); 836 ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr); 837 838 pc->ops->apply = PCApply_Redundant; 839 pc->ops->applytranspose = 0; 840 pc->ops->setup = PCSetUp_Redundant; 841 pc->ops->destroy = PCDestroy_Redundant; 842 pc->ops->setfromoptions = PCSetFromOptions_Redundant; 843 pc->ops->setuponblocks = 0; 844 pc->ops->view = PCView_Redundant; 845 pc->ops->applyrichardson = 0; 846 847 pc->data = (void*)red; 848 849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant", 850 PCRedundantSetScatter_Redundant);CHKERRQ(ierr); 851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant", 852 PCRedundantGetPC_Redundant);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant", 854 PCRedundantGetOperators_Redundant);CHKERRQ(ierr); 855 856 PetscFunctionReturn(0); 857 } 858 EXTERN_C_END 859