1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 60 nmax = PetscMax(nmin,nmax); 61 if (pcbddc->adaptive_threshold > 1.0) { 62 thresh = 1.0/pcbddc->adaptive_threshold; 63 } else { 64 thresh = 1.0; 65 } 66 67 /* allocate lapack workspace */ 68 cum = cum2 = 0; 69 maxneigs = 0; 70 for (i=0;i<sub_schurs->n_subs;i++) { 71 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 72 PetscInt n,subset_size; 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size*n; 76 cum2 += n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 81 if (mss) { 82 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 83 PetscBLASInt B_itype = 1; 84 PetscBLASInt B_N = mss; 85 PetscReal zero = 0.0; 86 PetscReal eps = 0.0; /* dlamch? */ 87 88 B_lwork = -1; 89 S = NULL; 90 St = NULL; 91 eigs = NULL; 92 eigv = NULL; 93 B_iwork = NULL; 94 B_ifail = NULL; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 116 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 117 #if defined(PETSC_USE_COMPLEX) 118 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 119 #endif 120 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 121 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 122 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 123 nv+cum,&pcbddc->adaptive_constraints_idxs, 124 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 125 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 126 127 maxneigs = 0; 128 cum = cum2 = cumarray = 0; 129 if (sub_schurs->is_Ej_com) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_ptrs[cum] = cum; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 } 139 cum2 = cum; 140 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 141 } 142 143 if (mss) { /* multilevel */ 144 if (pcbddc->use_deluxe_scaling) { 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 } else { 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 148 } 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 150 } 151 152 for (i=0;i<sub_schurs->n_subs;i++) { 153 PetscInt j,subset_size; 154 155 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 156 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 157 const PetscInt *idxs; 158 PetscScalar one = 1.0,scalar_zero = 0.0; 159 PetscReal zero=0.0; 160 PetscBLASInt B_N; 161 162 /* S should be copied since we need it for deluxe scaling */ 163 if (sub_schurs->is_hermitian) { 164 PetscInt j; 165 for (j=0;j<subset_size;j++) { 166 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 167 } 168 for (j=0;j<subset_size;j++) { 169 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 170 } 171 } else { 172 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 } 175 /* is always this the right matrix? */ 176 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 177 178 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 179 /* St = Starray+cumarray; */ 180 181 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 182 PetscBLASInt B_itype = 1; 183 PetscBLASInt B_IL = 1, B_IU; 184 PetscReal eps = -1.0; /* dlamch? */ 185 PetscInt nmin_s; 186 187 /* ask for eigenvalues lower than thresh */ 188 if (pcbddc->dbg_flag) { 189 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs); 190 } 191 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 193 #if defined(PETSC_USE_COMPLEX) 194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 195 #else 196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 197 #endif 198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 199 if (B_ierr) { 200 if (B_ierr < 0 ) { 201 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 202 } else if (B_ierr <= B_N) { 203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 204 } else { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 206 } 207 } 208 209 if (B_neigs > nmax) { 210 if (pcbddc->dbg_flag) { 211 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 212 } 213 B_neigs = nmax; 214 } 215 216 nmin_s = PetscMin(nmin,B_N); 217 if (B_neigs < nmin_s) { 218 PetscBLASInt B_neigs2; 219 220 B_IL = B_neigs + 1; 221 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 224 } 225 if (sub_schurs->is_hermitian) { 226 PetscInt j; 227 for (j=0;j<subset_size;j++) { 228 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 229 } 230 for (j=0;j<subset_size;j++) { 231 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 232 } 233 } else { 234 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 235 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 236 } 237 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 238 #if defined(PETSC_USE_COMPLEX) 239 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 240 #else 241 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 242 #endif 243 ierr = PetscFPTrapPop();CHKERRQ(ierr); 244 B_neigs += B_neigs2; 245 } 246 if (B_ierr) { 247 if (B_ierr < 0 ) { 248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 249 } else if (B_ierr <= B_N) { 250 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 251 } else { 252 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 253 } 254 } 255 if (pcbddc->dbg_flag) { 256 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 257 for (j=0;j<B_neigs;j++) { 258 if (eigs[j] == 0.0) { 259 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 260 } else { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 262 } 263 } 264 } 265 } else { 266 /* TODO */ 267 } 268 maxneigs = PetscMax(B_neigs,maxneigs); 269 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 270 271 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 272 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 273 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 274 275 if (pcbddc->dbg_flag > 1) { 276 PetscInt ii; 277 for (ii=0;ii<B_neigs;ii++) { 278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 279 for (j=0;j<B_N;j++) { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 281 } 282 } 283 } 284 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 for (j=0;j<B_neigs;j++) { 286 #if 0 287 { 288 PetscBLASInt Blas_N,Blas_one = 1.0; 289 PetscScalar norm; 290 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 291 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 292 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 293 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 294 } else { 295 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 296 } 297 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 298 } 299 #endif 300 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 301 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 302 cum2 += subset_size; 303 } 304 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 305 } 306 /* shift for next computation */ 307 cumarray += subset_size*subset_size; 308 } 309 if (pcbddc->dbg_flag) { 310 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 311 } 312 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 313 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 314 315 if (mss) { 316 if (pcbddc->use_deluxe_scaling) { 317 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 318 } else { 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 320 } 321 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 322 } 323 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 324 #if defined(PETSC_USE_COMPLEX) 325 ierr = PetscFree(rwork);CHKERRQ(ierr); 326 #endif 327 if (pcbddc->dbg_flag) { 328 PetscInt maxneigs_r; 329 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 330 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCSetUpSolvers" 337 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 338 { 339 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 340 PetscScalar *coarse_submat_vals; 341 PetscErrorCode ierr; 342 343 PetscFunctionBegin; 344 /* Setup local scatters R_to_B and (optionally) R_to_D */ 345 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 346 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 347 348 /* Setup local neumann solver ksp_R */ 349 /* PCBDDCSetUpLocalScatters should be called first! */ 350 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 351 352 /* Change global null space passed in by the user if change of basis has been requested */ 353 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 354 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 355 } 356 357 /* 358 Setup local correction and local part of coarse basis. 359 Gives back the dense local part of the coarse matrix in column major ordering 360 */ 361 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 362 363 /* Compute total number of coarse nodes and setup coarse solver */ 364 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 365 366 /* free */ 367 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "PCBDDCResetCustomization" 373 PetscErrorCode PCBDDCResetCustomization(PC pc) 374 { 375 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 376 PetscErrorCode ierr; 377 378 PetscFunctionBegin; 379 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 380 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 381 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 383 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 386 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 388 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 389 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "PCBDDCResetTopography" 395 PetscErrorCode PCBDDCResetTopography(PC pc) 396 { 397 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 403 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 404 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 405 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCBDDCResetSolvers" 411 PetscErrorCode PCBDDCResetSolvers(PC pc) 412 { 413 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 414 PetscScalar *array; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 419 if (pcbddc->coarse_phi_B) { 420 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 421 ierr = PetscFree(array);CHKERRQ(ierr); 422 } 423 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 424 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 425 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 426 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 427 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 428 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 429 if (pcbddc->local_auxmat2) { 430 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 431 ierr = PetscFree(array);CHKERRQ(ierr); 432 } 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 447 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 454 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 455 { 456 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 457 PC_IS *pcis = (PC_IS*)pc->data; 458 VecType impVecType; 459 PetscInt n_constraints,n_R,old_size; 460 PetscErrorCode ierr; 461 462 PetscFunctionBegin; 463 if (!pcbddc->ConstraintMatrix) { 464 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 465 } 466 /* get sizes */ 467 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 468 n_R = pcis->n-pcbddc->n_actual_vertices; 469 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 470 /* local work vectors (try to avoid unneeded work)*/ 471 /* R nodes */ 472 old_size = -1; 473 if (pcbddc->vec1_R) { 474 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 475 } 476 if (n_R != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 478 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 479 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 480 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 481 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 482 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 483 } 484 /* local primal dofs */ 485 old_size = -1; 486 if (pcbddc->vec1_P) { 487 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 488 } 489 if (pcbddc->local_primal_size != old_size) { 490 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 491 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 493 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 494 } 495 /* local explicit constraints */ 496 old_size = -1; 497 if (pcbddc->vec1_C) { 498 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 499 } 500 if (n_constraints && n_constraints != old_size) { 501 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 502 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 504 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 505 } 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "PCBDDCSetUpCorrection" 511 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 512 { 513 PetscErrorCode ierr; 514 /* pointers to pcis and pcbddc */ 515 PC_IS* pcis = (PC_IS*)pc->data; 516 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 517 /* submatrices of local problem */ 518 Mat A_RV,A_VR,A_VV; 519 /* submatrices of local coarse problem */ 520 Mat S_VV,S_CV,S_VC,S_CC; 521 /* working matrices */ 522 Mat C_CR; 523 /* additional working stuff */ 524 PC pc_R; 525 Mat F; 526 PetscBool isLU,isCHOL,isILU; 527 528 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 529 PetscScalar *work; 530 PetscInt *idx_V_B; 531 PetscInt n,n_vertices,n_constraints; 532 PetscInt i,n_R,n_D,n_B; 533 PetscBool unsymmetric_check; 534 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 535 MatType impMatType; 536 /* some shortcuts to scalars */ 537 PetscScalar one=1.0,m_one=-1.0; 538 539 PetscFunctionBegin; 540 /* get number of vertices (corners plus constraints with change of basis) 541 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 542 n_vertices = pcbddc->n_actual_vertices; 543 n_constraints = pcbddc->local_primal_size-n_vertices; 544 /* Set Non-overlapping dimensions */ 545 n_B = pcis->n_B; n_D = pcis->n - n_B; 546 n_R = pcis->n-n_vertices; 547 548 /* Set types for local objects needed by BDDC precondtioner */ 549 impMatType = MATSEQDENSE; 550 551 /* vertices in boundary numbering */ 552 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 553 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 554 if (i != n_vertices) { 555 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 556 } 557 558 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 559 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 561 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 563 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 565 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 567 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 568 569 unsymmetric_check = PETSC_FALSE; 570 /* allocate workspace */ 571 n = 0; 572 if (n_constraints) { 573 n += n_R*n_constraints; 574 } 575 if (n_vertices) { 576 n = PetscMax(2*n_R*n_vertices,n); 577 } 578 if (!pcbddc->issym) { 579 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 580 unsymmetric_check = PETSC_TRUE; 581 } 582 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 583 584 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 585 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 586 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 589 if (isLU || isILU || isCHOL) { 590 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 591 } else { 592 F = NULL; 593 } 594 595 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 596 if (n_constraints) { 597 Mat M1,M2,M3; 598 IS is_aux; 599 /* see if we can save some allocations */ 600 if (pcbddc->local_auxmat2) { 601 PetscInt on_R,on_constraints; 602 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 603 if (on_R != n_R || on_constraints != n_constraints) { 604 PetscScalar *marray; 605 606 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 607 ierr = PetscFree(marray);CHKERRQ(ierr); 608 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 609 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 610 } 611 } 612 /* auxiliary matrices */ 613 if (!pcbddc->local_auxmat2) { 614 PetscScalar *marray; 615 616 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 617 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 618 marray += n_R*n_constraints; 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 620 } 621 622 /* Extract constraints on R nodes: C_{CR} */ 623 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 625 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 626 627 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 if (F) { 641 Mat B; 642 643 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 644 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 645 ierr = MatDestroy(&B);CHKERRQ(ierr); 646 } else { 647 PetscScalar *xarray; 648 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 649 for (i=0;i<n_constraints;i++) { 650 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 651 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 652 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 653 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 654 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 655 } 656 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 657 } 658 659 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 660 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 661 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 662 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 663 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 664 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 665 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 666 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 667 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 668 ierr = MatDestroy(&M2);CHKERRQ(ierr); 669 ierr = MatDestroy(&M3);CHKERRQ(ierr); 670 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 671 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 672 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 673 ierr = MatDestroy(&M1);CHKERRQ(ierr); 674 } 675 /* Get submatrices from subdomain matrix */ 676 if (n_vertices) { 677 Mat newmat; 678 IS is_aux; 679 PetscInt ibs,mbs; 680 PetscBool issbaij; 681 682 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 683 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 684 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 685 if (ibs != mbs) { /* need to convert to SEQAIJ */ 686 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 687 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 688 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 689 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 690 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 691 } else { 692 /* this is safe */ 693 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 694 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 695 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 696 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 697 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 698 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 699 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 700 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 701 } else { 702 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 703 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 704 } 705 } 706 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 707 } 708 709 /* Matrix of coarse basis functions (local) */ 710 if (pcbddc->coarse_phi_B) { 711 PetscInt on_B,on_primal,on_D=n_D; 712 if (pcbddc->coarse_phi_D) { 713 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 714 } 715 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 716 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 717 PetscScalar *marray; 718 719 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 720 ierr = PetscFree(marray);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 724 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 725 } 726 } 727 728 if (!pcbddc->coarse_phi_B) { 729 PetscScalar *marray; 730 731 n = n_B*pcbddc->local_primal_size; 732 if (pcbddc->switch_static || pcbddc->dbg_flag) { 733 n += n_D*pcbddc->local_primal_size; 734 } 735 if (!pcbddc->issym) { 736 n *= 2; 737 } 738 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 740 n = n_B*pcbddc->local_primal_size; 741 if (pcbddc->switch_static || pcbddc->dbg_flag) { 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 743 n += n_D*pcbddc->local_primal_size; 744 } 745 if (!pcbddc->issym) { 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 747 if (pcbddc->switch_static || pcbddc->dbg_flag) { 748 n = n_B*pcbddc->local_primal_size; 749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 750 } 751 } else { 752 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 753 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 754 if (pcbddc->switch_static || pcbddc->dbg_flag) { 755 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 756 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 757 } 758 } 759 } 760 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 761 /* vertices */ 762 if (n_vertices) { 763 764 if (n_R) { 765 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 766 PetscBLASInt B_N,B_one = 1; 767 PetscScalar *x,*y; 768 769 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 771 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 772 if (F) { 773 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 774 } else { 775 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 776 for (i=0;i<n_vertices;i++) { 777 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 778 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 779 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 781 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 782 } 783 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 784 } 785 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 786 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 787 if (n_constraints) { 788 Mat B; 789 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 790 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 791 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 792 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 793 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 794 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 795 ierr = MatDestroy(&B);CHKERRQ(ierr); 796 } 797 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 798 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 799 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 800 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 801 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 802 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 803 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 804 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 805 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 806 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 807 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 808 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 809 } else { 810 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 811 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 812 } 813 /* coarse basis functions */ 814 for (i=0;i<n_vertices;i++) { 815 PetscScalar *y; 816 817 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 818 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 819 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 820 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 y[n_B*i+idx_V_B[i]] = 1.0; 823 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 824 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 825 826 if (pcbddc->switch_static || pcbddc->dbg_flag) { 827 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 828 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 829 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 832 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 833 } 834 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 835 } 836 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 837 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 838 } 839 840 if (n_constraints) { 841 Mat B; 842 843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 844 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 845 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 846 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 847 if (n_vertices) { 848 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&B);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_constraints;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 861 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 862 863 if (pcbddc->switch_static || pcbddc->dbg_flag) { 864 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 865 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 866 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 869 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 870 } 871 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 872 } 873 } 874 875 /* compute other basis functions for non-symmetric problems */ 876 if (!pcbddc->issym) { 877 Mat B,X; 878 879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 880 881 if (n_constraints) { 882 Mat S_CCT,B_C; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 885 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 886 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 887 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 888 if (n_vertices) { 889 Mat B_V,S_VCT; 890 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 892 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 893 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 894 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 895 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 896 } 897 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 898 } 899 if (n_vertices && n_R) { 900 Mat A_VRT; 901 PetscBLASInt B_N,B_one = 1; 902 903 if (!n_constraints) { /* if there are no constraints, reset work */ 904 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 905 } 906 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 907 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 908 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 909 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 910 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 911 } 912 913 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 914 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 915 for (i=0;i<pcbddc->local_primal_size;i++) { 916 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 917 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 918 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 921 } 922 } else { 923 for (i=0;i<pcbddc->local_primal_size;i++) { 924 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 925 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 926 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 927 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 928 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 929 } 930 } 931 ierr = MatDestroy(&B);CHKERRQ(ierr); 932 /* coarse basis functions */ 933 for (i=0;i<pcbddc->local_primal_size;i++) { 934 PetscScalar *y; 935 936 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 937 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 938 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 939 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 if (i<n_vertices) { 942 y[n_B*i+idx_V_B[i]] = 1.0; 943 } 944 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 945 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 946 947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 948 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 950 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 953 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 954 } 955 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 956 } 957 ierr = MatDestroy(&X);CHKERRQ(ierr); 958 } 959 960 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 961 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 962 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 963 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 964 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 965 /* Checking coarse_sub_mat and coarse basis functios */ 966 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 967 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 968 if (pcbddc->dbg_flag) { 969 Mat coarse_sub_mat; 970 Mat AUXMAT,TM1,TM2,TM3,TM4; 971 Mat coarse_phi_D,coarse_phi_B; 972 Mat coarse_psi_D,coarse_psi_B; 973 Mat A_II,A_BB,A_IB,A_BI; 974 MatType checkmattype=MATSEQAIJ; 975 PetscReal real_value; 976 977 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 979 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 980 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 981 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 982 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 983 if (unsymmetric_check) { 984 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 985 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 986 } 987 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 988 989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 990 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 992 if (unsymmetric_check) { 993 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 994 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 995 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 996 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 997 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 998 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 999 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1001 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1002 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1003 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1004 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1005 } else { 1006 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1007 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1008 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1010 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1011 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1012 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1013 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1014 } 1015 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1017 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1018 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1019 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1020 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 1021 1022 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1024 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1025 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1026 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1027 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1028 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1029 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1030 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1031 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1032 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1033 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1034 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1035 if (unsymmetric_check) { 1036 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1037 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1038 } 1039 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1040 } 1041 1042 /* free memory */ 1043 ierr = PetscFree(work);CHKERRQ(ierr); 1044 if (n_vertices) { 1045 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1046 } 1047 if (n_constraints) { 1048 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1049 } 1050 /* get back data */ 1051 *coarse_submat_vals_n = coarse_submat_vals; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1057 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1058 { 1059 Mat *work_mat; 1060 IS isrow_s,iscol_s; 1061 PetscBool rsorted,csorted; 1062 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1063 PetscErrorCode ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1067 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1068 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1069 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1070 1071 if (!rsorted) { 1072 const PetscInt *idxs; 1073 PetscInt *idxs_sorted,i; 1074 1075 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1077 for (i=0;i<rsize;i++) { 1078 idxs_perm_r[i] = i; 1079 } 1080 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1081 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1082 for (i=0;i<rsize;i++) { 1083 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1084 } 1085 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1089 isrow_s = isrow; 1090 } 1091 1092 if (!csorted) { 1093 if (isrow == iscol) { 1094 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1095 iscol_s = isrow_s; 1096 } else { 1097 const PetscInt *idxs; 1098 PetscInt *idxs_sorted,i; 1099 1100 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1101 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1102 for (i=0;i<csize;i++) { 1103 idxs_perm_c[i] = i; 1104 } 1105 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1106 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1107 for (i=0;i<csize;i++) { 1108 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1109 } 1110 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1111 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1112 } 1113 } else { 1114 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1115 iscol_s = iscol; 1116 } 1117 1118 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1119 1120 if (!rsorted || !csorted) { 1121 Mat new_mat; 1122 IS is_perm_r,is_perm_c; 1123 1124 if (!rsorted) { 1125 PetscInt *idxs_r,i; 1126 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1127 for (i=0;i<rsize;i++) { 1128 idxs_r[idxs_perm_r[i]] = i; 1129 } 1130 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1131 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1132 } else { 1133 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1134 } 1135 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1136 1137 if (!csorted) { 1138 if (isrow_s == iscol_s) { 1139 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1140 is_perm_c = is_perm_r; 1141 } else { 1142 PetscInt *idxs_c,i; 1143 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1144 for (i=0;i<csize;i++) { 1145 idxs_c[idxs_perm_c[i]] = i; 1146 } 1147 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1148 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1149 } 1150 } else { 1151 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1152 } 1153 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1154 1155 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1156 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1157 work_mat[0] = new_mat; 1158 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1159 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1160 } 1161 1162 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1163 *B = work_mat[0]; 1164 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1165 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1166 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1172 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1173 { 1174 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1175 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1176 Mat new_mat; 1177 IS is_local,is_global; 1178 PetscInt local_size; 1179 PetscBool isseqaij; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1184 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1185 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1186 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1187 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1188 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1189 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1190 1191 /* check */ 1192 if (pcbddc->dbg_flag) { 1193 Vec x,x_change; 1194 PetscReal error; 1195 1196 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1197 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1198 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1199 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1201 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1202 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1205 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1207 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1208 ierr = VecDestroy(&x);CHKERRQ(ierr); 1209 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1210 } 1211 1212 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1213 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1214 if (isseqaij) { 1215 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1216 } else { 1217 Mat work_mat; 1218 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1219 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1220 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1221 } 1222 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1223 #if !defined(PETSC_USE_COMPLEX) 1224 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1225 #endif 1226 /* 1227 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1228 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1229 */ 1230 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1236 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1237 { 1238 PC_IS* pcis = (PC_IS*)(pc->data); 1239 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1240 IS is_aux1,is_aux2; 1241 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1242 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1243 PetscInt vbs,bs; 1244 PetscBT bitmask; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 /* 1249 No need to setup local scatters if 1250 - primal space is unchanged 1251 AND 1252 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1253 AND 1254 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1255 */ 1256 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1257 PetscFunctionReturn(0); 1258 } 1259 /* destroy old objects */ 1260 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1261 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1263 /* Set Non-overlapping dimensions */ 1264 n_B = pcis->n_B; n_D = pcis->n - n_B; 1265 n_vertices = pcbddc->n_actual_vertices; 1266 /* create auxiliary bitmask */ 1267 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1268 for (i=0;i<n_vertices;i++) { 1269 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1270 } 1271 1272 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1273 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1274 for (i=0, n_R=0; i<pcis->n; i++) { 1275 if (!PetscBTLookup(bitmask,i)) { 1276 idx_R_local[n_R] = i; 1277 n_R++; 1278 } 1279 } 1280 1281 /* Block code */ 1282 vbs = 1; 1283 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1284 if (bs>1 && !(n_vertices%bs)) { 1285 PetscBool is_blocked = PETSC_TRUE; 1286 PetscInt *vary; 1287 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1288 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1289 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1290 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1291 for (i=0; i<n_vertices/bs; i++) { 1292 if (vary[i]!=0 && vary[i]!=bs) { 1293 is_blocked = PETSC_FALSE; 1294 break; 1295 } 1296 } 1297 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1298 vbs = bs; 1299 for (i=0;i<n_R/vbs;i++) { 1300 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1301 } 1302 } 1303 ierr = PetscFree(vary);CHKERRQ(ierr); 1304 } 1305 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1306 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1307 1308 /* print some info if requested */ 1309 if (pcbddc->dbg_flag) { 1310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1311 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1318 } 1319 1320 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1321 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1322 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1323 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1324 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 for (i=0; i<n_D; i++) { 1326 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1327 } 1328 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1329 for (i=0, j=0; i<n_R; i++) { 1330 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1331 aux_array1[j++] = i; 1332 } 1333 } 1334 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1335 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1336 for (i=0, j=0; i<n_B; i++) { 1337 if (!PetscBTLookup(bitmask,is_indices[i])) { 1338 aux_array2[j++] = i; 1339 } 1340 } 1341 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1342 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1343 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1344 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1345 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1346 1347 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1348 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1349 for (i=0, j=0; i<n_R; i++) { 1350 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1351 aux_array1[j++] = i; 1352 } 1353 } 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1355 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1356 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1357 } 1358 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1359 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1366 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1367 { 1368 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1369 PC_IS *pcis = (PC_IS*)pc->data; 1370 PC pc_temp; 1371 Mat A_RR; 1372 MatReuse reuse; 1373 PetscScalar m_one = -1.0; 1374 PetscReal value; 1375 PetscInt n_D,n_R,ibs,mbs; 1376 PetscBool use_exact,use_exact_reduced,issbaij; 1377 PetscErrorCode ierr; 1378 /* prefixes stuff */ 1379 char dir_prefix[256],neu_prefix[256],str_level[16]; 1380 size_t len; 1381 1382 PetscFunctionBegin; 1383 1384 /* compute prefixes */ 1385 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1386 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1387 if (!pcbddc->current_level) { 1388 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1389 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1390 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1391 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1392 } else { 1393 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1394 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1395 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1396 len -= 15; /* remove "pc_bddc_coarse_" */ 1397 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1398 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1399 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1400 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1401 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1402 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1403 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1404 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1405 } 1406 1407 /* DIRICHLET PROBLEM */ 1408 if (dirichlet) { 1409 if (pcbddc->issym) { 1410 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1411 } 1412 /* Matrix for Dirichlet problem is pcis->A_II */ 1413 n_D = pcis->n - pcis->n_B; 1414 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1415 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1416 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1417 /* default */ 1418 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1419 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1420 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1421 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1422 if (issbaij) { 1423 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1424 } else { 1425 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1426 } 1427 /* Allow user's customization */ 1428 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1429 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1430 } 1431 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1432 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1433 if (!n_D) { 1434 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1435 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1436 } 1437 /* Set Up KSP for Dirichlet problem of BDDC */ 1438 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1439 /* set ksp_D into pcis data */ 1440 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1441 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1442 pcis->ksp_D = pcbddc->ksp_D; 1443 } 1444 1445 /* NEUMANN PROBLEM */ 1446 A_RR = 0; 1447 if (neumann) { 1448 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1449 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1450 if (pcbddc->ksp_R) { /* already created ksp */ 1451 PetscInt nn_R; 1452 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1453 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1454 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1455 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1456 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1457 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1458 reuse = MAT_INITIAL_MATRIX; 1459 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1460 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1461 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1462 reuse = MAT_INITIAL_MATRIX; 1463 } else { /* safe to reuse the matrix */ 1464 reuse = MAT_REUSE_MATRIX; 1465 } 1466 } 1467 /* last check */ 1468 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1469 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1470 reuse = MAT_INITIAL_MATRIX; 1471 } 1472 } else { /* first time, so we need to create the matrix */ 1473 reuse = MAT_INITIAL_MATRIX; 1474 } 1475 /* extract A_RR */ 1476 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1477 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1478 if (ibs != mbs) { 1479 Mat newmat; 1480 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1481 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1482 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1483 } else { 1484 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1485 } 1486 if (pcbddc->issym) { 1487 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1488 } 1489 if (!pcbddc->ksp_R) { /* create object if not present */ 1490 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1491 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1492 /* default */ 1493 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1494 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1495 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1496 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1497 if (issbaij) { 1498 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1499 } else { 1500 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1501 } 1502 /* Allow user's customization */ 1503 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1504 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1505 } 1506 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1507 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1508 if (!n_R) { 1509 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1510 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1511 } 1512 /* Set Up KSP for Neumann problem of BDDC */ 1513 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1514 } 1515 1516 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1517 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1520 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1522 } 1523 if (dirichlet) { /* Dirichlet */ 1524 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1525 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1526 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1527 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1528 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1529 /* need to be adapted? */ 1530 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1531 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1532 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1533 /* print info */ 1534 if (pcbddc->dbg_flag) { 1535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1537 } 1538 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1539 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1540 } 1541 } 1542 if (neumann) { /* Neumann */ 1543 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1544 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1545 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1546 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1547 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1548 /* need to be adapted? */ 1549 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1550 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1551 /* print info */ 1552 if (pcbddc->dbg_flag) { 1553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1555 } 1556 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1557 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1558 } 1559 } 1560 } 1561 /* free Neumann problem's matrix */ 1562 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1568 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1569 { 1570 PetscErrorCode ierr; 1571 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1572 1573 PetscFunctionBegin; 1574 if (applytranspose) { 1575 if (pcbddc->local_auxmat1) { 1576 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1577 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1578 } 1579 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1580 } else { 1581 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1582 if (pcbddc->local_auxmat1) { 1583 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1584 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1585 } 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1593 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1594 { 1595 PetscErrorCode ierr; 1596 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1597 PC_IS* pcis = (PC_IS*) (pc->data); 1598 const PetscScalar zero = 0.0; 1599 1600 PetscFunctionBegin; 1601 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1602 if (applytranspose) { 1603 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1604 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1605 } else { 1606 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1607 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1608 } 1609 /* start communications from local primal nodes to rhs of coarse solver */ 1610 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1611 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1615 /* TODO remove null space when doing multilevel */ 1616 if (pcbddc->coarse_ksp) { 1617 Vec rhs,sol; 1618 1619 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1620 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1621 if (applytranspose) { 1622 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1623 } else { 1624 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1625 } 1626 } 1627 1628 /* Local solution on R nodes */ 1629 if (pcis->n) { 1630 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1631 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1632 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1633 if (pcbddc->switch_static) { 1634 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 } 1637 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1638 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 if (pcbddc->switch_static) { 1642 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 } 1645 } 1646 1647 /* communications from coarse sol to local primal nodes */ 1648 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1649 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1650 1651 /* Sum contributions from two levels */ 1652 if (applytranspose) { 1653 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1654 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1655 } else { 1656 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1657 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1664 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1665 { 1666 PetscErrorCode ierr; 1667 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1668 PetscScalar *array; 1669 Vec from,to; 1670 1671 PetscFunctionBegin; 1672 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1673 from = pcbddc->coarse_vec; 1674 to = pcbddc->vec1_P; 1675 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1676 Vec tvec; 1677 1678 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1679 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1680 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1681 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1682 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1683 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1684 } 1685 } else { /* from local to global -> put data in coarse right hand side */ 1686 from = pcbddc->vec1_P; 1687 to = pcbddc->coarse_vec; 1688 } 1689 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1695 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1696 { 1697 PetscErrorCode ierr; 1698 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1699 PetscScalar *array; 1700 Vec from,to; 1701 1702 PetscFunctionBegin; 1703 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1704 from = pcbddc->coarse_vec; 1705 to = pcbddc->vec1_P; 1706 } else { /* from local to global -> put data in coarse right hand side */ 1707 from = pcbddc->vec1_P; 1708 to = pcbddc->coarse_vec; 1709 } 1710 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1711 if (smode == SCATTER_FORWARD) { 1712 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1713 Vec tvec; 1714 1715 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1716 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1717 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1718 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1719 } 1720 } else { 1721 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1722 ierr = VecResetArray(from);CHKERRQ(ierr); 1723 } 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /* uncomment for testing purposes */ 1729 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1732 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1733 { 1734 PetscErrorCode ierr; 1735 PC_IS* pcis = (PC_IS*)(pc->data); 1736 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1737 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1738 /* one and zero */ 1739 PetscScalar one=1.0,zero=0.0; 1740 /* space to store constraints and their local indices */ 1741 PetscScalar *temp_quadrature_constraint; 1742 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1743 /* iterators */ 1744 PetscInt i,j,k,total_counts,temp_start_ptr; 1745 /* BLAS integers */ 1746 PetscBLASInt lwork,lierr; 1747 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1748 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1749 /* reuse */ 1750 PetscInt olocal_primal_size; 1751 PetscInt *oprimal_indices_local_idxs; 1752 /* change of basis */ 1753 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1754 PetscBool boolforchange,qr_needed; 1755 PetscBT touched,change_basis,qr_needed_idx; 1756 /* auxiliary stuff */ 1757 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1758 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1759 PetscScalar *temp_quadrature_constraint_work=NULL; 1760 /* some quantities */ 1761 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1762 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1763 1764 PetscFunctionBegin; 1765 /* Destroy Mat objects computed previously */ 1766 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1767 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1768 1769 /* print some info */ 1770 if (pcbddc->dbg_flag) { 1771 IS vertices; 1772 PetscInt nv,nedges,nfaces; 1773 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1774 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1775 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1776 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1779 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1781 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1782 } 1783 1784 if (!pcbddc->adaptive_selection) { 1785 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1786 MatNullSpace nearnullsp; 1787 const Vec *nearnullvecs; 1788 Vec *localnearnullsp; 1789 PetscScalar *array; 1790 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1791 PetscBool nnsp_has_cnst; 1792 /* LAPACK working arrays for SVD or POD */ 1793 PetscBool skip_lapack; 1794 PetscScalar *work; 1795 PetscReal *singular_vals; 1796 #if defined(PETSC_USE_COMPLEX) 1797 PetscReal *rwork; 1798 #endif 1799 #if defined(PETSC_MISSING_LAPACK_GESVD) 1800 PetscScalar *temp_basis,*correlation_mat; 1801 #else 1802 PetscBLASInt dummy_int=1; 1803 PetscScalar dummy_scalar=1.; 1804 #endif 1805 1806 /* Get index sets for faces, edges and vertices from graph */ 1807 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1808 /* free unneeded index sets */ 1809 if (!pcbddc->use_vertices) { 1810 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1811 } 1812 if (!pcbddc->use_edges) { 1813 for (i=0;i<n_ISForEdges;i++) { 1814 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1815 } 1816 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1817 n_ISForEdges = 0; 1818 } 1819 if (!pcbddc->use_faces) { 1820 for (i=0;i<n_ISForFaces;i++) { 1821 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1822 } 1823 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1824 n_ISForFaces = 0; 1825 } 1826 /* HACKS (the following two blocks of code) */ 1827 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1828 pcbddc->use_change_of_basis = PETSC_TRUE; 1829 if (!ISForEdges) { 1830 pcbddc->use_change_on_faces = PETSC_TRUE; 1831 } 1832 } 1833 if (pcbddc->NullSpace) { 1834 /* use_change_of_basis should be consistent among processors */ 1835 PetscBool tbool[2],gbool[2]; 1836 tbool [0] = pcbddc->use_change_of_basis; 1837 tbool [1] = pcbddc->use_change_on_faces; 1838 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1839 pcbddc->use_change_of_basis = gbool[0]; 1840 pcbddc->use_change_on_faces = gbool[1]; 1841 } 1842 1843 /* check if near null space is attached to global mat */ 1844 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1845 if (nearnullsp) { 1846 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1847 /* remove any stored info */ 1848 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1849 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1850 /* store information for BDDC solver reuse */ 1851 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1852 pcbddc->onearnullspace = nearnullsp; 1853 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1854 for (i=0;i<nnsp_size;i++) { 1855 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1856 } 1857 } else { /* if near null space is not provided BDDC uses constants by default */ 1858 nnsp_size = 0; 1859 nnsp_has_cnst = PETSC_TRUE; 1860 } 1861 /* get max number of constraints on a single cc */ 1862 max_constraints = nnsp_size; 1863 if (nnsp_has_cnst) max_constraints++; 1864 1865 /* 1866 Evaluate maximum storage size needed by the procedure 1867 - temp_indices will contain start index of each constraint stored as follows 1868 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1869 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1870 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1871 */ 1872 total_counts = n_ISForFaces+n_ISForEdges; 1873 total_counts *= max_constraints; 1874 n_vertices = 0; 1875 if (ISForVertices) { 1876 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1877 } 1878 total_counts += n_vertices; 1879 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1880 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1881 total_counts = 0; 1882 max_size_of_constraint = 0; 1883 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1884 if (i<n_ISForEdges) { 1885 used_IS = &ISForEdges[i]; 1886 } else { 1887 used_IS = &ISForFaces[i-n_ISForEdges]; 1888 } 1889 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1890 total_counts += j; 1891 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1892 } 1893 total_counts *= max_constraints; 1894 total_counts += n_vertices; 1895 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1896 /* get local part of global near null space vectors */ 1897 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1898 for (k=0;k<nnsp_size;k++) { 1899 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1900 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1901 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 } 1903 1904 /* whether or not to skip lapack calls */ 1905 skip_lapack = PETSC_TRUE; 1906 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1907 1908 /* allocate some auxiliary stuff */ 1909 if (!skip_lapack || pcbddc->use_qr_single) { 1910 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1911 } else { 1912 gidxs = NULL; 1913 permutation = NULL; 1914 temp_indices_to_constraint_work = NULL; 1915 temp_quadrature_constraint_work = NULL; 1916 } 1917 1918 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1919 if (!skip_lapack) { 1920 PetscScalar temp_work; 1921 1922 #if defined(PETSC_MISSING_LAPACK_GESVD) 1923 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1924 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1925 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1926 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1927 #if defined(PETSC_USE_COMPLEX) 1928 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1929 #endif 1930 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1931 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1932 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1933 lwork = -1; 1934 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1935 #if !defined(PETSC_USE_COMPLEX) 1936 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1937 #else 1938 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1939 #endif 1940 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1941 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1942 #else /* on missing GESVD */ 1943 /* SVD */ 1944 PetscInt max_n,min_n; 1945 max_n = max_size_of_constraint; 1946 min_n = max_constraints; 1947 if (max_size_of_constraint < max_constraints) { 1948 min_n = max_size_of_constraint; 1949 max_n = max_constraints; 1950 } 1951 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1952 #if defined(PETSC_USE_COMPLEX) 1953 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1954 #endif 1955 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1956 lwork = -1; 1957 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1961 #if !defined(PETSC_USE_COMPLEX) 1962 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1963 #else 1964 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1965 #endif 1966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1967 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1968 #endif /* on missing GESVD */ 1969 /* Allocate optimal workspace */ 1970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1971 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1972 } 1973 /* Now we can loop on constraining sets */ 1974 total_counts = 0; 1975 temp_indices[0] = 0; 1976 /* vertices */ 1977 if (ISForVertices) { 1978 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1979 if (nnsp_has_cnst) { /* consider all vertices */ 1980 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1981 for (i=0;i<n_vertices;i++) { 1982 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1983 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1984 total_counts++; 1985 } 1986 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1987 PetscBool used_vertex; 1988 for (i=0;i<n_vertices;i++) { 1989 used_vertex = PETSC_FALSE; 1990 k = 0; 1991 while (!used_vertex && k<nnsp_size) { 1992 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1993 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1994 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1995 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1996 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1997 total_counts++; 1998 used_vertex = PETSC_TRUE; 1999 } 2000 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2001 k++; 2002 } 2003 } 2004 } 2005 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2006 n_vertices = total_counts; 2007 } 2008 2009 /* edges and faces */ 2010 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2011 if (ncc<n_ISForEdges) { 2012 used_IS = &ISForEdges[ncc]; 2013 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2014 } else { 2015 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2016 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2017 } 2018 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2019 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2020 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2021 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2022 /* change of basis should not be performed on local periodic nodes */ 2023 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2024 if (nnsp_has_cnst) { 2025 PetscScalar quad_value; 2026 temp_constraints++; 2027 if (!pcbddc->use_nnsp_true) { 2028 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2029 } else { 2030 quad_value = 1.0; 2031 } 2032 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (j=0;j<size_of_constraint;j++) { 2034 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2035 } 2036 /* sort by global ordering if using lapack subroutines (not needed!) */ 2037 if (!skip_lapack || pcbddc->use_qr_single) { 2038 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2039 for (j=0;j<size_of_constraint;j++) { 2040 permutation[j]=j; 2041 } 2042 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2043 for (j=0;j<size_of_constraint;j++) { 2044 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2045 } 2046 for (j=0;j<size_of_constraint;j++) { 2047 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2048 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2049 } 2050 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2051 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2052 } 2053 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2054 total_counts++; 2055 } 2056 for (k=0;k<nnsp_size;k++) { 2057 PetscReal real_value; 2058 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2059 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2060 for (j=0;j<size_of_constraint;j++) { 2061 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2062 } 2063 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2064 /* check if array is null on the connected component */ 2065 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2066 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2067 if (real_value > 0.0) { /* keep indices and values */ 2068 /* sort by global ordering if using lapack subroutines */ 2069 if (!skip_lapack || pcbddc->use_qr_single) { 2070 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2071 for (j=0;j<size_of_constraint;j++) { 2072 permutation[j]=j; 2073 } 2074 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2075 for (j=0;j<size_of_constraint;j++) { 2076 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2077 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2078 } 2079 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2080 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2081 } 2082 temp_constraints++; 2083 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2084 total_counts++; 2085 } 2086 } 2087 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2088 valid_constraints = temp_constraints; 2089 if (!pcbddc->use_nnsp_true && temp_constraints) { 2090 if (temp_constraints == 1) { /* just normalize the constraint */ 2091 PetscScalar norm; 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2093 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2094 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2095 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2096 } else { /* perform SVD */ 2097 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2098 2099 #if defined(PETSC_MISSING_LAPACK_GESVD) 2100 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2101 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2102 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2103 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2104 from that computed using LAPACKgesvd 2105 -> This is due to a different computation of eigenvectors in LAPACKheev 2106 -> The quality of the POD-computed basis will be the same */ 2107 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2108 /* Store upper triangular part of correlation matrix */ 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2111 for (j=0;j<temp_constraints;j++) { 2112 for (k=0;k<j+1;k++) { 2113 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2114 } 2115 } 2116 /* compute eigenvalues and eigenvectors of correlation matrix */ 2117 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2119 #if !defined(PETSC_USE_COMPLEX) 2120 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2121 #else 2122 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2123 #endif 2124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2125 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2126 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2127 j = 0; 2128 while (j < temp_constraints && singular_vals[j] < tol) j++; 2129 total_counts = total_counts-j; 2130 valid_constraints = temp_constraints-j; 2131 /* scale and copy POD basis into used quadrature memory */ 2132 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2133 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2138 if (j<temp_constraints) { 2139 PetscInt ii; 2140 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2142 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2143 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2144 for (k=0;k<temp_constraints-j;k++) { 2145 for (ii=0;ii<size_of_constraint;ii++) { 2146 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2147 } 2148 } 2149 } 2150 #else /* on missing GESVD */ 2151 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2153 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2154 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2155 #if !defined(PETSC_USE_COMPLEX) 2156 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2157 #else 2158 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2159 #endif 2160 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2161 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2162 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2163 k = temp_constraints; 2164 if (k > size_of_constraint) k = size_of_constraint; 2165 j = 0; 2166 while (j < k && singular_vals[k-j-1] < tol) j++; 2167 valid_constraints = k-j; 2168 total_counts = total_counts-temp_constraints+valid_constraints; 2169 #endif /* on missing GESVD */ 2170 } 2171 } 2172 /* setting change_of_basis flag is safe now */ 2173 if (boolforchange) { 2174 for (j=0;j<valid_constraints;j++) { 2175 PetscBTSet(change_basis,total_counts-j-1); 2176 } 2177 } 2178 } 2179 /* free workspace */ 2180 if (!skip_lapack || pcbddc->use_qr_single) { 2181 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2182 } 2183 if (!skip_lapack) { 2184 ierr = PetscFree(work);CHKERRQ(ierr); 2185 #if defined(PETSC_USE_COMPLEX) 2186 ierr = PetscFree(rwork);CHKERRQ(ierr); 2187 #endif 2188 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2189 #if defined(PETSC_MISSING_LAPACK_GESVD) 2190 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2191 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2192 #endif 2193 } 2194 for (k=0;k<nnsp_size;k++) { 2195 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2196 } 2197 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2198 /* free index sets of faces, edges and vertices */ 2199 for (i=0;i<n_ISForFaces;i++) { 2200 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2201 } 2202 if (n_ISForFaces) { 2203 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2204 } 2205 for (i=0;i<n_ISForEdges;i++) { 2206 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2207 } 2208 if (n_ISForEdges) { 2209 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2210 } 2211 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2212 } else { 2213 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2214 PetscInt cum = 0; 2215 2216 total_counts = 0; 2217 n_vertices = 0; 2218 if (sub_schurs->is_Ej_com) { 2219 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2220 } 2221 max_constraints = 0; 2222 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2223 total_counts += pcbddc->adaptive_constraints_n[i]; 2224 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2225 } 2226 temp_indices = pcbddc->adaptive_constraints_ptrs; 2227 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2228 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2229 2230 #if 0 2231 printf("Found %d totals\n",total_counts); 2232 for (i=0;i<total_counts;i++) { 2233 printf("const %d, start %d",i,temp_indices[i]); 2234 printf(" end %d:\n",temp_indices[i+1]); 2235 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2236 printf(" idxs %d",temp_indices_to_constraint[j]); 2237 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2238 } 2239 } 2240 for (i=0;i<n_vertices;i++) { 2241 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2242 } 2243 for (i=0;i<sub_schurs->n_subs;i++) { 2244 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2245 } 2246 #endif 2247 2248 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2249 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2250 /* Change of basis */ 2251 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2252 if (pcbddc->use_change_of_basis) { 2253 cum = n_vertices; 2254 for (i=0;i<sub_schurs->n_subs;i++) { 2255 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2256 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2257 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2258 } 2259 } 2260 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2261 } 2262 } 2263 } 2264 2265 /* map temp_indices_to_constraint in boundary numbering */ 2266 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2267 if (i != temp_indices[total_counts]) { 2268 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2269 } 2270 2271 /* set quantities in pcbddc data structure and store previous primal size */ 2272 /* n_vertices defines the number of subdomain corners in the primal space */ 2273 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2274 olocal_primal_size = pcbddc->local_primal_size; 2275 pcbddc->local_primal_size = total_counts; 2276 pcbddc->n_vertices = n_vertices; 2277 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2278 2279 /* Create constraint matrix */ 2280 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2281 /* so we need to set it up properly either with or without change of basis */ 2282 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2283 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2284 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2285 /* array to compute a local numbering of constraints : vertices first then constraints */ 2286 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2287 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2288 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2289 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2290 /* auxiliary stuff for basis change */ 2291 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2292 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2293 2294 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2295 total_primal_vertices=0; 2296 for (i=0;i<pcbddc->local_primal_size;i++) { 2297 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2298 if (size_of_constraint == 1) { 2299 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2300 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2301 aux_primal_minloc[total_primal_vertices]=0; 2302 total_primal_vertices++; 2303 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2304 PetscInt min_loc,min_index; 2305 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2306 /* find first untouched local node */ 2307 k = 0; 2308 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2309 min_index = global_indices[k]; 2310 min_loc = k; 2311 /* search the minimum among global nodes already untouched on the cc */ 2312 for (k=1;k<size_of_constraint;k++) { 2313 /* there can be more than one constraint on a single connected component */ 2314 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2315 min_index = global_indices[k]; 2316 min_loc = k; 2317 } 2318 } 2319 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2320 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2321 aux_primal_minloc[total_primal_vertices]=min_loc; 2322 total_primal_vertices++; 2323 } 2324 } 2325 /* determine if a QR strategy is needed for change of basis */ 2326 qr_needed = PETSC_FALSE; 2327 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2328 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2329 if (PetscBTLookup(change_basis,i)) { 2330 if (!pcbddc->use_qr_single) { 2331 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2332 j = 0; 2333 for (k=0;k<size_of_constraint;k++) { 2334 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2335 j++; 2336 } 2337 } 2338 /* found more than one primal dof on the cc */ 2339 if (j > 1) { 2340 PetscBTSet(qr_needed_idx,i); 2341 qr_needed = PETSC_TRUE; 2342 } 2343 } else { 2344 PetscBTSet(qr_needed_idx,i); 2345 qr_needed = PETSC_TRUE; 2346 } 2347 } 2348 } 2349 /* free workspace */ 2350 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2351 2352 /* permute indices in order to have a sorted set of vertices */ 2353 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2354 2355 /* nonzero structure of constraint matrix */ 2356 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2357 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2358 j=total_primal_vertices; 2359 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2360 if (!PetscBTLookup(change_basis,i)) { 2361 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2362 j++; 2363 } 2364 } 2365 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2366 ierr = PetscFree(nnz);CHKERRQ(ierr); 2367 /* set values in constraint matrix */ 2368 for (i=0;i<total_primal_vertices;i++) { 2369 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2370 } 2371 total_counts = total_primal_vertices; 2372 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2373 if (!PetscBTLookup(change_basis,i)) { 2374 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2375 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2376 total_counts++; 2377 } 2378 } 2379 /* assembling */ 2380 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2381 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2382 /* 2383 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2384 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2385 */ 2386 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2387 if (pcbddc->use_change_of_basis) { 2388 /* dual and primal dofs on a single cc */ 2389 PetscInt dual_dofs,primal_dofs; 2390 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2391 PetscInt primal_counter; 2392 /* working stuff for GEQRF */ 2393 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2394 PetscBLASInt lqr_work; 2395 /* working stuff for UNGQR */ 2396 PetscScalar *gqr_work,lgqr_work_t; 2397 PetscBLASInt lgqr_work; 2398 /* working stuff for TRTRS */ 2399 PetscScalar *trs_rhs; 2400 PetscBLASInt Blas_NRHS; 2401 /* pointers for values insertion into change of basis matrix */ 2402 PetscInt *start_rows,*start_cols; 2403 PetscScalar *start_vals; 2404 /* working stuff for values insertion */ 2405 PetscBT is_primal; 2406 /* matrix sizes */ 2407 PetscInt global_size,local_size; 2408 /* temporary change of basis */ 2409 Mat localChangeOfBasisMatrix; 2410 /* extra space for debugging */ 2411 PetscScalar *dbg_work; 2412 2413 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2414 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2415 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2416 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2417 /* nonzeros for local mat */ 2418 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2419 for (i=0;i<pcis->n;i++) nnz[i]=1; 2420 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2421 if (PetscBTLookup(change_basis,i)) { 2422 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2423 if (PetscBTLookup(qr_needed_idx,i)) { 2424 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2425 } else { 2426 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2427 /* get local primal index on the cc */ 2428 j = 0; 2429 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2430 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2431 } 2432 } 2433 } 2434 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2435 ierr = PetscFree(nnz);CHKERRQ(ierr); 2436 /* Set initial identity in the matrix */ 2437 for (i=0;i<pcis->n;i++) { 2438 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2439 } 2440 2441 if (pcbddc->dbg_flag) { 2442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2443 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2444 } 2445 2446 2447 /* Now we loop on the constraints which need a change of basis */ 2448 /* 2449 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2450 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2451 2452 Basic blocks of change of basis matrix T computed by 2453 2454 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2455 2456 | 1 0 ... 0 s_1/S | 2457 | 0 1 ... 0 s_2/S | 2458 | ... | 2459 | 0 ... 1 s_{n-1}/S | 2460 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2461 2462 with S = \sum_{i=1}^n s_i^2 2463 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2464 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2465 2466 - QR decomposition of constraints otherwise 2467 */ 2468 if (qr_needed) { 2469 /* space to store Q */ 2470 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2471 /* first we issue queries for optimal work */ 2472 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2473 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2474 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2475 lqr_work = -1; 2476 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2478 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2479 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2480 lgqr_work = -1; 2481 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2482 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2484 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2485 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2486 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2487 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2488 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2489 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2490 /* array to store scaling factors for reflectors */ 2491 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2492 /* array to store rhs and solution of triangular solver */ 2493 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2494 /* allocating workspace for check */ 2495 if (pcbddc->dbg_flag) { 2496 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2497 } 2498 } 2499 /* array to store whether a node is primal or not */ 2500 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2501 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2502 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2503 if (i != total_primal_vertices) { 2504 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2505 } 2506 for (i=0;i<total_primal_vertices;i++) { 2507 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2508 } 2509 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2510 2511 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2512 /* -> using implicit ordering contained in temp_indices data */ 2513 total_counts = pcbddc->n_vertices; 2514 primal_counter = total_counts; 2515 while (total_counts<pcbddc->local_primal_size) { 2516 primal_dofs = 1; 2517 if (PetscBTLookup(change_basis,total_counts)) { 2518 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2519 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2520 primal_dofs++; 2521 } 2522 /* get constraint info */ 2523 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2524 dual_dofs = size_of_constraint-primal_dofs; 2525 2526 if (pcbddc->dbg_flag) { 2527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2528 } 2529 2530 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2531 2532 /* copy quadrature constraints for change of basis check */ 2533 if (pcbddc->dbg_flag) { 2534 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2535 } 2536 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2537 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2538 2539 /* compute QR decomposition of constraints */ 2540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2541 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2543 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2544 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2545 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2546 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2547 2548 /* explictly compute R^-T */ 2549 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2550 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2551 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2552 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2555 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2556 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2557 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2559 2560 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2565 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2566 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2567 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2569 2570 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2571 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2572 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2574 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2575 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2576 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2579 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2580 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2582 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2583 2584 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2585 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2586 /* insert cols for primal dofs */ 2587 for (j=0;j<primal_dofs;j++) { 2588 start_vals = &qr_basis[j*size_of_constraint]; 2589 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2590 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2591 } 2592 /* insert cols for dual dofs */ 2593 for (j=0,k=0;j<dual_dofs;k++) { 2594 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2595 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2596 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2597 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2598 j++; 2599 } 2600 } 2601 2602 /* check change of basis */ 2603 if (pcbddc->dbg_flag) { 2604 PetscInt ii,jj; 2605 PetscBool valid_qr=PETSC_TRUE; 2606 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2607 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2609 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2610 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2611 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2612 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2613 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2615 for (jj=0;jj<size_of_constraint;jj++) { 2616 for (ii=0;ii<primal_dofs;ii++) { 2617 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2618 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2619 } 2620 } 2621 if (!valid_qr) { 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2623 for (jj=0;jj<size_of_constraint;jj++) { 2624 for (ii=0;ii<primal_dofs;ii++) { 2625 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2626 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2627 } 2628 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2629 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2630 } 2631 } 2632 } 2633 } else { 2634 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2635 } 2636 } 2637 } else { /* simple transformation block */ 2638 PetscInt row,col; 2639 PetscScalar val,norm; 2640 2641 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2642 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2643 for (j=0;j<size_of_constraint;j++) { 2644 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2645 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2646 if (!PetscBTLookup(is_primal,row_B)) { 2647 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2648 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2649 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2650 } else { 2651 for (k=0;k<size_of_constraint;k++) { 2652 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2653 if (row != col) { 2654 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2655 } else { 2656 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2657 } 2658 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2659 } 2660 } 2661 } 2662 if (pcbddc->dbg_flag) { 2663 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2664 } 2665 } 2666 /* increment primal counter */ 2667 primal_counter += primal_dofs; 2668 } else { 2669 if (pcbddc->dbg_flag) { 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2671 } 2672 } 2673 /* increment constraint counter total_counts */ 2674 total_counts += primal_dofs; 2675 } 2676 2677 /* free workspace */ 2678 if (qr_needed) { 2679 if (pcbddc->dbg_flag) { 2680 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2681 } 2682 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2683 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2684 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2685 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2686 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2687 } 2688 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2689 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2690 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 2692 /* assembling of global change of variable */ 2693 { 2694 Mat tmat; 2695 PetscInt bs; 2696 2697 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2698 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2699 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2700 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2701 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2702 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2703 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2704 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2705 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2706 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2707 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2708 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2709 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2710 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2711 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2712 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2713 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2714 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2715 } 2716 /* check */ 2717 if (pcbddc->dbg_flag) { 2718 PetscReal error; 2719 Vec x,x_change; 2720 2721 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2722 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2723 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2724 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2725 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2726 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2727 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2728 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2729 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2730 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2731 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2732 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2733 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2734 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2735 ierr = VecDestroy(&x);CHKERRQ(ierr); 2736 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2737 } 2738 2739 /* adapt sub_schurs computed (if any) */ 2740 if (pcbddc->use_deluxe_scaling) { 2741 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2742 if (sub_schurs->n_subs_par_g) { 2743 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2744 } 2745 if (sub_schurs->S_Ej_all) { 2746 Mat S_1,S_2,tmat; 2747 IS is_all_N; 2748 2749 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2750 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2751 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2752 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2753 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2754 sub_schurs->S_Ej_all = S_1; 2755 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2756 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2757 sub_schurs->sum_S_Ej_all = S_2; 2758 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2759 } 2760 } 2761 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2762 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2763 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2764 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2765 } 2766 2767 /* set up change of basis context */ 2768 if (pcbddc->ChangeOfBasisMatrix) { 2769 PCBDDCChange_ctx change_ctx; 2770 2771 if (!pcbddc->new_global_mat) { 2772 PetscInt global_size,local_size; 2773 2774 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2775 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2776 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2777 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2778 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2779 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2780 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2781 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2782 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2783 } else { 2784 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2785 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2786 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2787 } 2788 if (!pcbddc->user_ChangeOfBasisMatrix) { 2789 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2790 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2791 } else { 2792 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2793 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2794 } 2795 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2796 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2797 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 } 2800 2801 /* get indices in local ordering for vertices and constraints */ 2802 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2803 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2804 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2805 } 2806 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2807 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2808 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2809 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2810 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2811 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2812 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2813 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2814 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2815 /* set quantities in PCBDDC data struct */ 2816 pcbddc->n_actual_vertices = i; 2817 /* check if a new primal space has been introduced */ 2818 pcbddc->new_primal_space_local = PETSC_TRUE; 2819 if (olocal_primal_size == pcbddc->local_primal_size) { 2820 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2821 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2822 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2823 } 2824 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2825 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2826 2827 /* flush dbg viewer */ 2828 if (pcbddc->dbg_flag) { 2829 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2830 } 2831 2832 /* free workspace */ 2833 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2834 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2835 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2836 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2837 if (!pcbddc->adaptive_selection) { 2838 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2839 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2840 } else { 2841 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2842 pcbddc->adaptive_constraints_ptrs, 2843 pcbddc->adaptive_constraints_idxs, 2844 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2845 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2846 } 2847 PetscFunctionReturn(0); 2848 } 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2852 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2853 { 2854 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2855 PC_IS *pcis = (PC_IS*)pc->data; 2856 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2857 PetscInt ierr,i,vertex_size; 2858 PetscViewer viewer=pcbddc->dbg_viewer; 2859 2860 PetscFunctionBegin; 2861 /* Reset previously computed graph */ 2862 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2863 /* Init local Graph struct */ 2864 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2865 2866 /* Check validity of the csr graph passed in by the user */ 2867 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2868 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2869 } 2870 2871 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2872 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2873 PetscInt *xadj,*adjncy; 2874 PetscInt nvtxs; 2875 PetscBool flg_row=PETSC_FALSE; 2876 2877 if (pcbddc->use_local_adj) { 2878 2879 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2880 if (flg_row) { 2881 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2882 pcbddc->computed_rowadj = PETSC_TRUE; 2883 } 2884 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2885 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2886 IS is_dummy; 2887 ISLocalToGlobalMapping l2gmap_dummy; 2888 PetscInt j,sum; 2889 PetscInt *cxadj,*cadjncy; 2890 const PetscInt *idxs; 2891 PCBDDCGraph graph; 2892 PetscBT is_on_boundary; 2893 2894 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2895 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2896 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2897 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2898 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2899 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2900 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2901 if (flg_row) { 2902 graph->xadj = xadj; 2903 graph->adjncy = adjncy; 2904 } 2905 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2906 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2907 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2908 2909 if (pcbddc->dbg_flag) { 2910 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2911 for (i=0;i<graph->ncc;i++) { 2912 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2913 } 2914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2915 } 2916 2917 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2918 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2919 for (i=0;i<pcis->n_B;i++) { 2920 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2921 } 2922 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2923 2924 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2925 sum = 0; 2926 for (i=0;i<graph->ncc;i++) { 2927 PetscInt sizecc = 0; 2928 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2929 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2930 sizecc++; 2931 } 2932 } 2933 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2934 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2935 cxadj[graph->queue[j]] = sizecc; 2936 } 2937 } 2938 sum += sizecc*sizecc; 2939 } 2940 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2941 sum = 0; 2942 for (i=0;i<pcis->n;i++) { 2943 PetscInt temp = cxadj[i]; 2944 cxadj[i] = sum; 2945 sum += temp; 2946 } 2947 cxadj[pcis->n] = sum; 2948 for (i=0;i<graph->ncc;i++) { 2949 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2950 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2951 PetscInt k,sizecc = 0; 2952 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2953 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2954 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2955 sizecc++; 2956 } 2957 } 2958 } 2959 } 2960 } 2961 if (pcis->n) { 2962 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2963 } else { 2964 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2965 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2966 } 2967 graph->xadj = 0; 2968 graph->adjncy = 0; 2969 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2970 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2971 } 2972 } 2973 2974 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2975 vertex_size = 1; 2976 if (pcbddc->user_provided_isfordofs) { 2977 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2978 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2979 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2980 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2981 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2982 } 2983 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2984 pcbddc->n_ISForDofs = 0; 2985 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2986 } 2987 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2988 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2989 } else { 2990 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2991 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2992 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2993 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2994 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2995 } 2996 } 2997 } 2998 2999 /* Setup of Graph */ 3000 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3001 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3002 } 3003 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3004 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3005 } 3006 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 3007 3008 /* Graph's connected components analysis */ 3009 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3010 3011 /* print some info to stdout */ 3012 if (pcbddc->dbg_flag) { 3013 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 3014 } 3015 3016 /* mark topography has done */ 3017 pcbddc->recompute_topography = PETSC_FALSE; 3018 PetscFunctionReturn(0); 3019 } 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3023 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3024 { 3025 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3026 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 n = 0; 3031 vertices = 0; 3032 if (pcbddc->ConstraintMatrix) { 3033 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3034 for (i=0;i<local_primal_size;i++) { 3035 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3036 if (size_of_constraint == 1) n++; 3037 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3038 } 3039 if (vertices_idx) { 3040 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3041 n = 0; 3042 for (i=0;i<local_primal_size;i++) { 3043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3044 if (size_of_constraint == 1) { 3045 vertices[n++]=row_cmat_indices[0]; 3046 } 3047 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3048 } 3049 } 3050 } 3051 *n_vertices = n; 3052 if (vertices_idx) *vertices_idx = vertices; 3053 PetscFunctionReturn(0); 3054 } 3055 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3058 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3059 { 3060 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3061 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3062 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3063 PetscBT touched; 3064 PetscErrorCode ierr; 3065 3066 /* This function assumes that the number of local constraints per connected component 3067 is not greater than the number of nodes defined for the connected component 3068 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3069 PetscFunctionBegin; 3070 n = 0; 3071 constraints_index = 0; 3072 if (pcbddc->ConstraintMatrix) { 3073 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3074 max_size_of_constraint = 0; 3075 for (i=0;i<local_primal_size;i++) { 3076 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3077 if (size_of_constraint > 1) { 3078 n++; 3079 } 3080 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3081 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3082 } 3083 if (constraints_idx) { 3084 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3085 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3086 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3087 n = 0; 3088 for (i=0;i<local_primal_size;i++) { 3089 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3090 if (size_of_constraint > 1) { 3091 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3092 /* find first untouched local node */ 3093 j = 0; 3094 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3095 min_index = row_cmat_global_indices[j]; 3096 min_loc = j; 3097 /* search the minimum among nodes not yet touched on the connected component 3098 since there can be more than one constraint on a single cc */ 3099 for (j=1;j<size_of_constraint;j++) { 3100 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3101 min_index = row_cmat_global_indices[j]; 3102 min_loc = j; 3103 } 3104 } 3105 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3106 constraints_index[n++] = row_cmat_indices[min_loc]; 3107 } 3108 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3109 } 3110 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3111 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3112 } 3113 } 3114 *n_constraints = n; 3115 if (constraints_idx) *constraints_idx = constraints_index; 3116 PetscFunctionReturn(0); 3117 } 3118 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "PCBDDCSubsetNumbering" 3121 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3122 { 3123 Vec local_vec,global_vec; 3124 IS seqis,paris; 3125 VecScatter scatter_ctx; 3126 PetscScalar *array; 3127 PetscInt *temp_global_dofs; 3128 PetscScalar globalsum; 3129 PetscInt i,j,s; 3130 PetscInt nlocals,first_index,old_index,max_local,max_global; 3131 PetscMPIInt rank_prec_comm,size_prec_comm; 3132 PetscInt *dof_sizes,*dof_displs; 3133 PetscBool first_found; 3134 PetscErrorCode ierr; 3135 3136 PetscFunctionBegin; 3137 /* mpi buffers */ 3138 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3139 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3140 j = ( !rank_prec_comm ? size_prec_comm : 0); 3141 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3142 /* get maximum size of subset */ 3143 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3144 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3145 max_local = 0; 3146 for (i=0;i<n_local_dofs;i++) { 3147 if (max_local < temp_global_dofs[i] ) { 3148 max_local = temp_global_dofs[i]; 3149 } 3150 } 3151 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3152 max_global++; 3153 max_local = 0; 3154 for (i=0;i<n_local_dofs;i++) { 3155 if (max_local < local_dofs[i] ) { 3156 max_local = local_dofs[i]; 3157 } 3158 } 3159 max_local++; 3160 /* allocate workspace */ 3161 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3162 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3163 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3164 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3165 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3166 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3167 /* create scatter */ 3168 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3169 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3170 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3171 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3172 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3173 /* init array */ 3174 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3175 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3176 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3177 if (local_dofs_mult) { 3178 for (i=0;i<n_local_dofs;i++) { 3179 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3180 } 3181 } else { 3182 for (i=0;i<n_local_dofs;i++) { 3183 array[local_dofs[i]]=1.0; 3184 } 3185 } 3186 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3187 /* scatter into global vec and get total number of global dofs */ 3188 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3190 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3191 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3192 /* Fill global_vec with cumulative function for global numbering */ 3193 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3194 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3195 nlocals = 0; 3196 first_index = -1; 3197 first_found = PETSC_FALSE; 3198 for (i=0;i<s;i++) { 3199 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3200 first_found = PETSC_TRUE; 3201 first_index = i; 3202 } 3203 nlocals += (PetscInt)PetscRealPart(array[i]); 3204 } 3205 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3206 if (!rank_prec_comm) { 3207 dof_displs[0]=0; 3208 for (i=1;i<size_prec_comm;i++) { 3209 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3210 } 3211 } 3212 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3213 if (first_found) { 3214 array[first_index] += (PetscScalar)nlocals; 3215 old_index = first_index; 3216 for (i=first_index+1;i<s;i++) { 3217 if (PetscRealPart(array[i]) > 0.1) { 3218 array[i] += array[old_index]; 3219 old_index = i; 3220 } 3221 } 3222 } 3223 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3224 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3225 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3226 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3227 /* get global ordering of local dofs */ 3228 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3229 if (local_dofs_mult) { 3230 for (i=0;i<n_local_dofs;i++) { 3231 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3232 } 3233 } else { 3234 for (i=0;i<n_local_dofs;i++) { 3235 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3236 } 3237 } 3238 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3239 /* free workspace */ 3240 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3241 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3242 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3243 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3244 /* return pointer to global ordering of local dofs */ 3245 *global_numbering_subset = temp_global_dofs; 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3251 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3252 { 3253 PetscInt i,j; 3254 PetscScalar *alphas; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 /* this implements stabilized Gram-Schmidt */ 3259 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3260 for (i=0;i<n;i++) { 3261 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3262 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3263 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3264 } 3265 ierr = PetscFree(alphas);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatISGetSubassemblingPattern" 3271 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3272 { 3273 Mat subdomain_adj; 3274 IS new_ranks,ranks_send_to; 3275 MatPartitioning partitioner; 3276 Mat_IS *matis; 3277 PetscInt n_neighs,*neighs,*n_shared,**shared; 3278 PetscInt prank; 3279 PetscMPIInt size,rank,color; 3280 PetscInt *xadj,*adjncy,*oldranks; 3281 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3282 PetscInt i,local_size,threshold=0; 3283 PetscErrorCode ierr; 3284 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3285 PetscSubcomm subcomm; 3286 3287 PetscFunctionBegin; 3288 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3289 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3290 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3291 3292 /* Get info on mapping */ 3293 matis = (Mat_IS*)(mat->data); 3294 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3295 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3296 3297 /* build local CSR graph of subdomains' connectivity */ 3298 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3299 xadj[0] = 0; 3300 xadj[1] = PetscMax(n_neighs-1,0); 3301 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3302 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3303 3304 if (threshold) { 3305 PetscInt xadj_count = 0; 3306 for (i=1;i<n_neighs;i++) { 3307 if (n_shared[i] > threshold) { 3308 adjncy[xadj_count] = neighs[i]; 3309 adjncy_wgt[xadj_count] = n_shared[i]; 3310 xadj_count++; 3311 } 3312 } 3313 xadj[1] = xadj_count; 3314 } else { 3315 if (xadj[1]) { 3316 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3317 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3318 } 3319 } 3320 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3321 if (use_square) { 3322 for (i=0;i<xadj[1];i++) { 3323 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3324 } 3325 } 3326 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3327 3328 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3329 3330 /* 3331 Restrict work on active processes only. 3332 */ 3333 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3334 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3335 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3336 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3337 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3338 if (color) { 3339 ierr = PetscFree(xadj);CHKERRQ(ierr); 3340 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3341 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3342 } else { 3343 PetscInt coarsening_ratio; 3344 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3345 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3346 prank = rank; 3347 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3348 /* 3349 for (i=0;i<size;i++) { 3350 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3351 } 3352 */ 3353 for (i=0;i<xadj[1];i++) { 3354 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3355 } 3356 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3357 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3358 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3359 3360 /* Partition */ 3361 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3362 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3363 if (use_vwgt) { 3364 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3365 v_wgt[0] = local_size; 3366 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3367 } 3368 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3369 coarsening_ratio = size/n_subdomains; 3370 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3371 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3372 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3373 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3374 3375 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3376 if (contiguous) { 3377 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3378 } else { 3379 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3380 } 3381 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3382 /* clean up */ 3383 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3384 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3385 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3386 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3387 } 3388 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3389 3390 /* assemble parallel IS for sends */ 3391 i = 1; 3392 if (color) i=0; 3393 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3394 3395 /* get back IS */ 3396 *is_sends = ranks_send_to; 3397 PetscFunctionReturn(0); 3398 } 3399 3400 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3401 3402 #undef __FUNCT__ 3403 #define __FUNCT__ "MatISSubassemble" 3404 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3405 { 3406 Mat local_mat; 3407 Mat_IS *matis; 3408 IS is_sends_internal; 3409 PetscInt rows,cols,new_local_rows; 3410 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3411 PetscBool ismatis,isdense,newisdense,destroy_mat; 3412 ISLocalToGlobalMapping l2gmap; 3413 PetscInt* l2gmap_indices; 3414 const PetscInt* is_indices; 3415 MatType new_local_type; 3416 /* buffers */ 3417 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3418 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3419 PetscInt *recv_buffer_idxs_local; 3420 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3421 /* MPI */ 3422 MPI_Comm comm,comm_n; 3423 PetscSubcomm subcomm; 3424 PetscMPIInt n_sends,n_recvs,commsize; 3425 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3426 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3427 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3428 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3429 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3430 PetscErrorCode ierr; 3431 3432 PetscFunctionBegin; 3433 /* TODO: add missing checks */ 3434 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3435 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3436 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3437 PetscValidLogicalCollectiveInt(mat,nis,7); 3438 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3439 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3440 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3441 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3442 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3443 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3444 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3445 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3446 PetscInt mrows,mcols,mnrows,mncols; 3447 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3448 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3449 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3450 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3451 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3452 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3453 } 3454 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3455 PetscValidLogicalCollectiveInt(mat,bs,0); 3456 /* prepare IS for sending if not provided */ 3457 if (!is_sends) { 3458 PetscBool pcontig = PETSC_TRUE; 3459 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3460 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3461 } else { 3462 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3463 is_sends_internal = is_sends; 3464 } 3465 3466 /* get pointer of MATIS data */ 3467 matis = (Mat_IS*)mat->data; 3468 3469 /* get comm */ 3470 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3471 3472 /* compute number of sends */ 3473 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3474 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3475 3476 /* compute number of receives */ 3477 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3479 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3480 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3481 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3482 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3483 ierr = PetscFree(iflags);CHKERRQ(ierr); 3484 3485 /* restrict comm if requested */ 3486 subcomm = 0; 3487 destroy_mat = PETSC_FALSE; 3488 if (restrict_comm) { 3489 PetscMPIInt color,subcommsize; 3490 3491 color = 0; 3492 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3493 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3494 subcommsize = commsize - subcommsize; 3495 /* check if reuse has been requested */ 3496 if (reuse == MAT_REUSE_MATRIX) { 3497 if (*mat_n) { 3498 PetscMPIInt subcommsize2; 3499 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3500 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3501 comm_n = PetscObjectComm((PetscObject)*mat_n); 3502 } else { 3503 comm_n = PETSC_COMM_SELF; 3504 } 3505 } else { /* MAT_INITIAL_MATRIX */ 3506 PetscMPIInt rank; 3507 3508 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3509 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3510 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3511 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3512 comm_n = PetscSubcommChild(subcomm); 3513 } 3514 /* flag to destroy *mat_n if not significative */ 3515 if (color) destroy_mat = PETSC_TRUE; 3516 } else { 3517 comm_n = comm; 3518 } 3519 3520 /* prepare send/receive buffers */ 3521 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3522 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3523 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3524 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3525 if (nis) { 3526 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3527 } 3528 3529 /* Get data from local matrices */ 3530 if (!isdense) { 3531 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3532 /* TODO: See below some guidelines on how to prepare the local buffers */ 3533 /* 3534 send_buffer_vals should contain the raw values of the local matrix 3535 send_buffer_idxs should contain: 3536 - MatType_PRIVATE type 3537 - PetscInt size_of_l2gmap 3538 - PetscInt global_row_indices[size_of_l2gmap] 3539 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3540 */ 3541 } else { 3542 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3543 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3544 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3545 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3546 send_buffer_idxs[1] = i; 3547 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3548 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3549 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3550 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3551 for (i=0;i<n_sends;i++) { 3552 ilengths_vals[is_indices[i]] = len*len; 3553 ilengths_idxs[is_indices[i]] = len+2; 3554 } 3555 } 3556 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3557 /* additional is (if any) */ 3558 if (nis) { 3559 PetscMPIInt psum; 3560 PetscInt j; 3561 for (j=0,psum=0;j<nis;j++) { 3562 PetscInt plen; 3563 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3564 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3565 psum += len+1; /* indices + lenght */ 3566 } 3567 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3568 for (j=0,psum=0;j<nis;j++) { 3569 PetscInt plen; 3570 const PetscInt *is_array_idxs; 3571 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3572 send_buffer_idxs_is[psum] = plen; 3573 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3574 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3575 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3576 psum += plen+1; /* indices + lenght */ 3577 } 3578 for (i=0;i<n_sends;i++) { 3579 ilengths_idxs_is[is_indices[i]] = psum; 3580 } 3581 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3582 } 3583 3584 buf_size_idxs = 0; 3585 buf_size_vals = 0; 3586 buf_size_idxs_is = 0; 3587 for (i=0;i<n_recvs;i++) { 3588 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3589 buf_size_vals += (PetscInt)olengths_vals[i]; 3590 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3591 } 3592 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3593 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3594 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3595 3596 /* get new tags for clean communications */ 3597 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3598 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3599 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3600 3601 /* allocate for requests */ 3602 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3603 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3604 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3605 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3606 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3607 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3608 3609 /* communications */ 3610 ptr_idxs = recv_buffer_idxs; 3611 ptr_vals = recv_buffer_vals; 3612 ptr_idxs_is = recv_buffer_idxs_is; 3613 for (i=0;i<n_recvs;i++) { 3614 source_dest = onodes[i]; 3615 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3616 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3617 ptr_idxs += olengths_idxs[i]; 3618 ptr_vals += olengths_vals[i]; 3619 if (nis) { 3620 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3621 ptr_idxs_is += olengths_idxs_is[i]; 3622 } 3623 } 3624 for (i=0;i<n_sends;i++) { 3625 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3626 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3627 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3628 if (nis) { 3629 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3630 } 3631 } 3632 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3633 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3634 3635 /* assemble new l2g map */ 3636 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3637 ptr_idxs = recv_buffer_idxs; 3638 new_local_rows = 0; 3639 for (i=0;i<n_recvs;i++) { 3640 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3641 ptr_idxs += olengths_idxs[i]; 3642 } 3643 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3644 ptr_idxs = recv_buffer_idxs; 3645 new_local_rows = 0; 3646 for (i=0;i<n_recvs;i++) { 3647 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3648 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3649 ptr_idxs += olengths_idxs[i]; 3650 } 3651 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3652 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3653 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3654 3655 /* infer new local matrix type from received local matrices type */ 3656 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3657 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3658 if (n_recvs) { 3659 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3660 ptr_idxs = recv_buffer_idxs; 3661 for (i=0;i<n_recvs;i++) { 3662 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3663 new_local_type_private = MATAIJ_PRIVATE; 3664 break; 3665 } 3666 ptr_idxs += olengths_idxs[i]; 3667 } 3668 switch (new_local_type_private) { 3669 case MATDENSE_PRIVATE: 3670 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3671 new_local_type = MATSEQAIJ; 3672 bs = 1; 3673 } else { /* if I receive only 1 dense matrix */ 3674 new_local_type = MATSEQDENSE; 3675 bs = 1; 3676 } 3677 break; 3678 case MATAIJ_PRIVATE: 3679 new_local_type = MATSEQAIJ; 3680 bs = 1; 3681 break; 3682 case MATBAIJ_PRIVATE: 3683 new_local_type = MATSEQBAIJ; 3684 break; 3685 case MATSBAIJ_PRIVATE: 3686 new_local_type = MATSEQSBAIJ; 3687 break; 3688 default: 3689 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3690 break; 3691 } 3692 } else { /* by default, new_local_type is seqdense */ 3693 new_local_type = MATSEQDENSE; 3694 bs = 1; 3695 } 3696 3697 /* create MATIS object if needed */ 3698 if (reuse == MAT_INITIAL_MATRIX) { 3699 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3700 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3701 } else { 3702 /* it also destroys the local matrices */ 3703 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3704 } 3705 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3706 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3707 3708 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3709 3710 /* Global to local map of received indices */ 3711 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3712 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3713 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3714 3715 /* restore attributes -> type of incoming data and its size */ 3716 buf_size_idxs = 0; 3717 for (i=0;i<n_recvs;i++) { 3718 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3719 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3720 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3721 } 3722 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3723 3724 /* set preallocation */ 3725 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3726 if (!newisdense) { 3727 PetscInt *new_local_nnz=0; 3728 3729 ptr_vals = recv_buffer_vals; 3730 ptr_idxs = recv_buffer_idxs_local; 3731 if (n_recvs) { 3732 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3733 } 3734 for (i=0;i<n_recvs;i++) { 3735 PetscInt j; 3736 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3737 for (j=0;j<*(ptr_idxs+1);j++) { 3738 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3739 } 3740 } else { 3741 /* TODO */ 3742 } 3743 ptr_idxs += olengths_idxs[i]; 3744 } 3745 if (new_local_nnz) { 3746 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3747 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3748 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3749 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3750 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3751 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3752 } else { 3753 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3754 } 3755 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3756 } else { 3757 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3758 } 3759 3760 /* set values */ 3761 ptr_vals = recv_buffer_vals; 3762 ptr_idxs = recv_buffer_idxs_local; 3763 for (i=0;i<n_recvs;i++) { 3764 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3765 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3766 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3767 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3768 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3769 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3770 } else { 3771 /* TODO */ 3772 } 3773 ptr_idxs += olengths_idxs[i]; 3774 ptr_vals += olengths_vals[i]; 3775 } 3776 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3777 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3778 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3779 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3780 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3781 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3782 3783 #if 0 3784 if (!restrict_comm) { /* check */ 3785 Vec lvec,rvec; 3786 PetscReal infty_error; 3787 3788 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3789 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3790 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3791 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3792 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3793 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3794 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3795 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3796 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3797 } 3798 #endif 3799 3800 /* assemble new additional is (if any) */ 3801 if (nis) { 3802 PetscInt **temp_idxs,*count_is,j,psum; 3803 3804 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3805 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3806 ptr_idxs = recv_buffer_idxs_is; 3807 psum = 0; 3808 for (i=0;i<n_recvs;i++) { 3809 for (j=0;j<nis;j++) { 3810 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3811 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3812 psum += plen; 3813 ptr_idxs += plen+1; /* shift pointer to received data */ 3814 } 3815 } 3816 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3817 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3818 for (i=1;i<nis;i++) { 3819 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3820 } 3821 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3822 ptr_idxs = recv_buffer_idxs_is; 3823 for (i=0;i<n_recvs;i++) { 3824 for (j=0;j<nis;j++) { 3825 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3826 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3827 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3828 ptr_idxs += plen+1; /* shift pointer to received data */ 3829 } 3830 } 3831 for (i=0;i<nis;i++) { 3832 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3833 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3834 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3835 } 3836 ierr = PetscFree(count_is);CHKERRQ(ierr); 3837 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3838 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3839 } 3840 /* free workspace */ 3841 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3842 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3843 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3844 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3845 if (isdense) { 3846 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3847 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3848 } else { 3849 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3850 } 3851 if (nis) { 3852 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3853 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3854 } 3855 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3856 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3857 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3858 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3859 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3860 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3861 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3862 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3863 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3864 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3865 ierr = PetscFree(onodes);CHKERRQ(ierr); 3866 if (nis) { 3867 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3868 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3869 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3870 } 3871 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3872 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3873 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3874 for (i=0;i<nis;i++) { 3875 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3876 } 3877 } 3878 PetscFunctionReturn(0); 3879 } 3880 3881 /* temporary hack into ksp private data structure */ 3882 #include <petsc-private/kspimpl.h> 3883 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3886 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3887 { 3888 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3889 PC_IS *pcis = (PC_IS*)pc->data; 3890 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3891 MatNullSpace CoarseNullSpace=NULL; 3892 ISLocalToGlobalMapping coarse_islg; 3893 IS coarse_is,*isarray; 3894 PetscInt i,im_active=-1,active_procs=-1; 3895 PetscInt nis,nisdofs,nisneu; 3896 PC pc_temp; 3897 PCType coarse_pc_type; 3898 KSPType coarse_ksp_type; 3899 PetscBool multilevel_requested,multilevel_allowed; 3900 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3901 Mat t_coarse_mat_is; 3902 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3903 PetscMPIInt all_procs; 3904 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3905 PetscBool compute_vecs = PETSC_FALSE; 3906 PetscScalar *array; 3907 PetscErrorCode ierr; 3908 3909 PetscFunctionBegin; 3910 /* Assign global numbering to coarse dofs */ 3911 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3912 PetscInt ocoarse_size; 3913 compute_vecs = PETSC_TRUE; 3914 ocoarse_size = pcbddc->coarse_size; 3915 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3916 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3917 /* see if we can avoid some work */ 3918 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3919 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3920 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3921 coarse_reuse = PETSC_FALSE; 3922 } else { /* we can safely reuse already computed coarse matrix */ 3923 coarse_reuse = PETSC_TRUE; 3924 } 3925 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3926 coarse_reuse = PETSC_FALSE; 3927 } 3928 /* reset any subassembling information */ 3929 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3930 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3931 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3932 coarse_reuse = PETSC_TRUE; 3933 } 3934 3935 /* count "active" (i.e. with positive local size) and "void" processes */ 3936 im_active = !!(pcis->n); 3937 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3938 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3939 void_procs = all_procs-active_procs; 3940 csin_type_simple = PETSC_TRUE; 3941 redist = PETSC_FALSE; 3942 if (pcbddc->current_level && void_procs) { 3943 csin_ml = PETSC_TRUE; 3944 ncoarse_ml = void_procs; 3945 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3946 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3947 csin_ds = PETSC_TRUE; 3948 ncoarse_ds = pcbddc->redistribute_coarse; 3949 redist = PETSC_TRUE; 3950 } else { 3951 csin_ds = PETSC_TRUE; 3952 ncoarse_ds = active_procs; 3953 redist = PETSC_TRUE; 3954 } 3955 } else { 3956 csin_ml = PETSC_FALSE; 3957 ncoarse_ml = all_procs; 3958 if (void_procs) { 3959 csin_ds = PETSC_TRUE; 3960 ncoarse_ds = void_procs; 3961 csin_type_simple = PETSC_FALSE; 3962 } else { 3963 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3964 csin_ds = PETSC_TRUE; 3965 ncoarse_ds = pcbddc->redistribute_coarse; 3966 redist = PETSC_TRUE; 3967 } else { 3968 csin_ds = PETSC_FALSE; 3969 ncoarse_ds = all_procs; 3970 } 3971 } 3972 } 3973 3974 /* 3975 test if we can go multilevel: three conditions must be satisfied: 3976 - we have not exceeded the number of levels requested 3977 - we can actually subassemble the active processes 3978 - we can find a suitable number of MPI processes where we can place the subassembled problem 3979 */ 3980 multilevel_allowed = PETSC_FALSE; 3981 multilevel_requested = PETSC_FALSE; 3982 if (pcbddc->current_level < pcbddc->max_levels) { 3983 multilevel_requested = PETSC_TRUE; 3984 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3985 multilevel_allowed = PETSC_FALSE; 3986 } else { 3987 multilevel_allowed = PETSC_TRUE; 3988 } 3989 } 3990 /* determine number of process partecipating to coarse solver */ 3991 if (multilevel_allowed) { 3992 ncoarse = ncoarse_ml; 3993 csin = csin_ml; 3994 redist = PETSC_FALSE; 3995 } else { 3996 ncoarse = ncoarse_ds; 3997 csin = csin_ds; 3998 } 3999 4000 /* creates temporary l2gmap and IS for coarse indexes */ 4001 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4002 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4003 4004 /* creates temporary MATIS object for coarse matrix */ 4005 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4006 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4007 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4008 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4009 #if 0 4010 { 4011 PetscViewer viewer; 4012 char filename[256]; 4013 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4014 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4015 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4016 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4017 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4018 } 4019 #endif 4020 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4021 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4022 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4023 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4024 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4025 4026 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4027 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4028 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4029 const PetscInt *idxs; 4030 ISLocalToGlobalMapping tmap; 4031 4032 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4033 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4034 /* allocate space for temporary storage */ 4035 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4036 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4037 /* allocate for IS array */ 4038 nisdofs = pcbddc->n_ISForDofsLocal; 4039 nisneu = !!pcbddc->NeumannBoundariesLocal; 4040 nis = nisdofs + nisneu; 4041 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4042 /* dofs splitting */ 4043 for (i=0;i<nisdofs;i++) { 4044 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4045 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4046 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4047 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4048 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4049 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4050 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4051 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4052 } 4053 /* neumann boundaries */ 4054 if (pcbddc->NeumannBoundariesLocal) { 4055 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4056 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4057 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4058 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4059 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4060 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4061 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4062 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4063 } 4064 /* free memory */ 4065 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4066 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4067 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4068 } else { 4069 nis = 0; 4070 nisdofs = 0; 4071 nisneu = 0; 4072 isarray = NULL; 4073 } 4074 /* destroy no longer needed map */ 4075 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4076 4077 /* restrict on coarse candidates (if needed) */ 4078 coarse_mat_is = NULL; 4079 if (csin) { 4080 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4081 if (redist) { 4082 PetscMPIInt rank; 4083 PetscInt spc,n_spc_p1,dest[1],destsize; 4084 4085 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4086 spc = active_procs/ncoarse; 4087 n_spc_p1 = active_procs%ncoarse; 4088 if (im_active) { 4089 destsize = 1; 4090 if (rank > n_spc_p1*(spc+1)-1) { 4091 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4092 } else { 4093 dest[0] = rank/(spc+1); 4094 } 4095 } else { 4096 destsize = 0; 4097 } 4098 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4099 } else if (csin_type_simple) { 4100 PetscMPIInt rank; 4101 PetscInt issize,isidx; 4102 4103 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4104 if (im_active) { 4105 issize = 1; 4106 isidx = (PetscInt)rank; 4107 } else { 4108 issize = 0; 4109 isidx = -1; 4110 } 4111 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4112 } else { /* get a suitable subassembling pattern from MATIS code */ 4113 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4114 } 4115 4116 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4117 if (!redist || ncoarse <= void_procs) { 4118 PetscInt ncoarse_cand,tissize,*nisindices; 4119 PetscInt *coarse_candidates; 4120 const PetscInt* tisindices; 4121 4122 /* get coarse candidates' ranks in pc communicator */ 4123 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4124 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4125 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4126 if (!coarse_candidates[i]) { 4127 coarse_candidates[ncoarse_cand++]=i; 4128 } 4129 } 4130 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4131 4132 4133 if (pcbddc->dbg_flag) { 4134 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4136 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4138 for (i=0;i<ncoarse_cand;i++) { 4139 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4140 } 4141 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4142 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4143 } 4144 /* shift the pattern on coarse candidates */ 4145 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4146 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4147 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4148 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4149 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4150 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4151 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4152 } 4153 if (pcbddc->dbg_flag) { 4154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4156 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4157 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4158 } 4159 } 4160 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4161 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4162 } else { 4163 if (pcbddc->dbg_flag) { 4164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4167 } 4168 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4169 coarse_mat_is = t_coarse_mat_is; 4170 } 4171 4172 /* create local to global scatters for coarse problem */ 4173 if (compute_vecs) { 4174 PetscInt lrows; 4175 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4176 if (coarse_mat_is) { 4177 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4178 } else { 4179 lrows = 0; 4180 } 4181 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4182 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4183 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4184 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4185 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4186 } 4187 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4188 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4189 4190 /* set defaults for coarse KSP and PC */ 4191 if (multilevel_allowed) { 4192 coarse_ksp_type = KSPRICHARDSON; 4193 coarse_pc_type = PCBDDC; 4194 } else { 4195 coarse_ksp_type = KSPPREONLY; 4196 coarse_pc_type = PCREDUNDANT; 4197 } 4198 4199 /* print some info if requested */ 4200 if (pcbddc->dbg_flag) { 4201 if (!multilevel_allowed) { 4202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4203 if (multilevel_requested) { 4204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4205 } else if (pcbddc->max_levels) { 4206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4207 } 4208 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4209 } 4210 } 4211 4212 /* create the coarse KSP object only once with defaults */ 4213 if (coarse_mat_is) { 4214 MatReuse coarse_mat_reuse; 4215 PetscViewer dbg_viewer = NULL; 4216 if (pcbddc->dbg_flag) { 4217 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4218 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4219 } 4220 if (!pcbddc->coarse_ksp) { 4221 char prefix[256],str_level[16]; 4222 size_t len; 4223 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4224 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4225 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4226 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4227 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4228 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4229 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4230 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4231 /* prefix */ 4232 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4233 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4234 if (!pcbddc->current_level) { 4235 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4236 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4237 } else { 4238 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4239 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4240 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4241 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4242 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4243 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4244 } 4245 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4246 /* allow user customization */ 4247 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4248 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4249 } 4250 4251 /* get some info after set from options */ 4252 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4253 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4254 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4255 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4256 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4257 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4258 isbddc = PETSC_FALSE; 4259 } 4260 if (isredundant) { 4261 KSP inner_ksp; 4262 PC inner_pc; 4263 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4264 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4265 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4266 } 4267 4268 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4269 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4270 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4271 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4272 if (nisdofs) { 4273 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4274 for (i=0;i<nisdofs;i++) { 4275 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4276 } 4277 } 4278 if (nisneu) { 4279 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4280 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4281 } 4282 4283 /* assemble coarse matrix */ 4284 if (coarse_reuse) { 4285 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4286 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4287 coarse_mat_reuse = MAT_REUSE_MATRIX; 4288 } else { 4289 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4290 } 4291 if (isbddc || isnn) { 4292 if (pcbddc->coarsening_ratio > 1) { 4293 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4294 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4295 if (pcbddc->dbg_flag) { 4296 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4297 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4298 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4299 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4300 } 4301 } 4302 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4303 } else { 4304 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4305 coarse_mat = coarse_mat_is; 4306 } 4307 } else { 4308 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4309 } 4310 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4311 4312 /* propagate symmetry info to coarse matrix */ 4313 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4314 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4315 4316 /* set operators */ 4317 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4318 if (pcbddc->dbg_flag) { 4319 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4320 } 4321 } else { /* processes non partecipating to coarse solver (if any) */ 4322 coarse_mat = 0; 4323 } 4324 ierr = PetscFree(isarray);CHKERRQ(ierr); 4325 #if 0 4326 { 4327 PetscViewer viewer; 4328 char filename[256]; 4329 sprintf(filename,"coarse_mat.m"); 4330 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4331 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4332 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4333 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4334 } 4335 #endif 4336 4337 /* Compute coarse null space (special handling by BDDC only) */ 4338 if (pcbddc->NullSpace) { 4339 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4340 } 4341 4342 if (pcbddc->coarse_ksp) { 4343 Vec crhs,csol; 4344 PetscBool ispreonly; 4345 if (CoarseNullSpace) { 4346 if (isbddc) { 4347 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4348 } else { 4349 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4350 } 4351 } 4352 /* setup coarse ksp */ 4353 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4354 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4355 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4356 /* hack */ 4357 if (!csol) { 4358 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4359 } 4360 if (!crhs) { 4361 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4362 } 4363 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4364 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4365 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4366 KSP check_ksp; 4367 KSPType check_ksp_type; 4368 PC check_pc; 4369 Vec check_vec,coarse_vec; 4370 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4371 PetscInt its; 4372 PetscBool compute_eigs; 4373 PetscReal *eigs_r,*eigs_c; 4374 PetscInt neigs; 4375 const char *prefix; 4376 4377 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4378 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4379 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4380 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4381 if (ispreonly) { 4382 check_ksp_type = KSPPREONLY; 4383 compute_eigs = PETSC_FALSE; 4384 } else { 4385 check_ksp_type = KSPGMRES; 4386 compute_eigs = PETSC_TRUE; 4387 } 4388 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4389 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4390 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4391 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4392 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4393 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4394 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4395 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4396 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4397 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4398 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4399 /* create random vec */ 4400 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4401 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4402 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4403 if (CoarseNullSpace) { 4404 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4405 } 4406 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4407 /* solve coarse problem */ 4408 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4409 if (CoarseNullSpace) { 4410 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4411 } 4412 /* set eigenvalue estimation if preonly has not been requested */ 4413 if (compute_eigs) { 4414 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4415 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4416 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4417 lambda_max = eigs_r[neigs-1]; 4418 lambda_min = eigs_r[0]; 4419 if (pcbddc->use_coarse_estimates) { 4420 if (lambda_max>lambda_min) { 4421 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4422 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4423 } 4424 } 4425 } 4426 4427 /* check coarse problem residual error */ 4428 if (pcbddc->dbg_flag) { 4429 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4430 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4431 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4432 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4433 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4434 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4435 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4436 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4437 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4438 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4439 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4440 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4441 if (compute_eigs) { 4442 PetscReal lambda_max_s,lambda_min_s; 4443 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4444 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4445 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4446 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4447 for (i=0;i<neigs;i++) { 4448 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4449 } 4450 } 4451 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4452 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4453 } 4454 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4455 if (compute_eigs) { 4456 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4457 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4458 } 4459 } 4460 } 4461 /* print additional info */ 4462 if (pcbddc->dbg_flag) { 4463 /* waits until all processes reaches this point */ 4464 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4465 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4466 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4467 } 4468 4469 /* free memory */ 4470 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4471 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 #undef __FUNCT__ 4476 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4477 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4478 { 4479 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4480 PC_IS* pcis = (PC_IS*)pc->data; 4481 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4482 PetscInt i,coarse_size=0; 4483 PetscInt *local_primal_indices=NULL; 4484 PetscErrorCode ierr; 4485 4486 PetscFunctionBegin; 4487 /* Compute global number of coarse dofs */ 4488 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4489 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4490 } 4491 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4492 4493 /* check numbering */ 4494 if (pcbddc->dbg_flag) { 4495 PetscScalar coarsesum,*array; 4496 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4497 4498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4500 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4501 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4502 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4503 for (i=0;i<pcbddc->local_primal_size;i++) { 4504 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4505 } 4506 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4507 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4508 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4509 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4510 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4511 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4512 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4513 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4514 for (i=0;i<pcis->n;i++) { 4515 if (array[i] == 1.0) { 4516 set_error = PETSC_TRUE; 4517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4518 } 4519 } 4520 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4522 for (i=0;i<pcis->n;i++) { 4523 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4524 } 4525 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4526 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4527 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4528 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4529 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4530 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4531 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4532 PetscInt *gidxs; 4533 4534 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4535 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4536 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4537 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4539 for (i=0;i<pcbddc->local_primal_size;i++) { 4540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]); 4541 } 4542 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4543 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4544 } 4545 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4546 if (set_error_reduced) { 4547 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4548 } 4549 } 4550 /* get back data */ 4551 *coarse_size_n = coarse_size; 4552 *local_primal_indices_n = local_primal_indices; 4553 PetscFunctionReturn(0); 4554 } 4555 4556 #undef __FUNCT__ 4557 #define __FUNCT__ "PCBDDCGlobalToLocal" 4558 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4559 { 4560 IS localis_t; 4561 PetscInt i,lsize,*idxs,n; 4562 PetscScalar *vals; 4563 PetscErrorCode ierr; 4564 4565 PetscFunctionBegin; 4566 /* get indices in local ordering exploiting local to global map */ 4567 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4568 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4569 for (i=0;i<lsize;i++) vals[i] = 1.0; 4570 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4571 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4572 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4573 if (idxs) { /* multilevel guard */ 4574 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4575 } 4576 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4577 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4578 ierr = PetscFree(vals);CHKERRQ(ierr); 4579 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4580 /* now compute set in local ordering */ 4581 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4582 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4583 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4584 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4585 for (i=0,lsize=0;i<n;i++) { 4586 if (PetscRealPart(vals[i]) > 0.5) { 4587 lsize++; 4588 } 4589 } 4590 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4591 for (i=0,lsize=0;i<n;i++) { 4592 if (PetscRealPart(vals[i]) > 0.5) { 4593 idxs[lsize++] = i; 4594 } 4595 } 4596 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4597 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4598 *localis = localis_t; 4599 PetscFunctionReturn(0); 4600 } 4601 4602 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4603 #undef __FUNCT__ 4604 #define __FUNCT__ "PCBDDCMatMult_Private" 4605 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4606 { 4607 PCBDDCChange_ctx change_ctx; 4608 PetscErrorCode ierr; 4609 4610 PetscFunctionBegin; 4611 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4612 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4613 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4614 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4615 PetscFunctionReturn(0); 4616 } 4617 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4620 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4621 { 4622 PCBDDCChange_ctx change_ctx; 4623 PetscErrorCode ierr; 4624 4625 PetscFunctionBegin; 4626 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4627 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4628 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4629 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4630 PetscFunctionReturn(0); 4631 } 4632 4633 #undef __FUNCT__ 4634 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4635 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4636 { 4637 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4638 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4639 PetscInt *used_xadj,*used_adjncy; 4640 PetscBool free_used_adj; 4641 PetscErrorCode ierr; 4642 4643 PetscFunctionBegin; 4644 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4645 free_used_adj = PETSC_FALSE; 4646 if (pcbddc->sub_schurs_layers == -1) { 4647 used_xadj = NULL; 4648 used_adjncy = NULL; 4649 } else { 4650 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4651 used_xadj = pcbddc->mat_graph->xadj; 4652 used_adjncy = pcbddc->mat_graph->adjncy; 4653 } else if (pcbddc->computed_rowadj) { 4654 used_xadj = pcbddc->mat_graph->xadj; 4655 used_adjncy = pcbddc->mat_graph->adjncy; 4656 } else { 4657 PetscBool flg_row=PETSC_FALSE; 4658 const PetscInt *xadj,*adjncy; 4659 PetscInt nvtxs; 4660 4661 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4662 if (flg_row) { 4663 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4664 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4665 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4666 free_used_adj = PETSC_TRUE; 4667 } else { 4668 pcbddc->sub_schurs_layers = -1; 4669 used_xadj = NULL; 4670 used_adjncy = NULL; 4671 } 4672 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4673 } 4674 } 4675 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4676 4677 /* free adjacency */ 4678 if (free_used_adj) { 4679 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4680 } 4681 PetscFunctionReturn(0); 4682 } 4683 4684 #undef __FUNCT__ 4685 #define __FUNCT__ "PCBDDCInitSubSchurs" 4686 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4687 { 4688 PC_IS *pcis=(PC_IS*)pc->data; 4689 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4690 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4691 PCBDDCGraph graph; 4692 Mat S_j; 4693 PetscErrorCode ierr; 4694 4695 PetscFunctionBegin; 4696 /* attach interface graph for determining subsets */ 4697 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4698 IS verticesIS; 4699 4700 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4701 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4702 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4703 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4704 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4705 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4706 /* 4707 if (pcbddc->dbg_flag) { 4708 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4709 } 4710 */ 4711 } else { 4712 graph = pcbddc->mat_graph; 4713 } 4714 4715 /* Create Schur complement matrix */ 4716 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4717 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4718 4719 /* sub_schurs init */ 4720 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4721 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4722 /* free graph struct */ 4723 if (pcbddc->sub_schurs_rebuild) { 4724 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4725 } 4726 PetscFunctionReturn(0); 4727 } 4728