1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 n = PetscMax(n,nmin); 66 cum += subset_size*n; 67 cum2 += n; 68 maxneigs = PetscMax(maxneigs,n); 69 } 70 } 71 72 if (mss) { 73 if (symmetric) { 74 PetscBLASInt B_itype = 1; 75 PetscBLASInt B_N = mss; 76 PetscScalar zero = 0.0; 77 PetscScalar eps = 0.0; /* dlamch? */ 78 79 B_lwork = -1; 80 S = NULL; 81 St = NULL; 82 eigs = NULL; 83 eigv = NULL; 84 B_iwork = NULL; 85 B_ifail = NULL; 86 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 87 #if defined(PETSC_USE_COMPLEX) 88 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 89 #else 90 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 91 #endif 92 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 93 ierr = PetscFPTrapPop();CHKERRQ(ierr); 94 } else { 95 /* TODO */ 96 } 97 } else { 98 lwork = 0; 99 } 100 101 nv = 0; 102 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 103 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 104 } 105 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 106 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 107 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 108 #if defined(PETSC_USE_COMPLEX) 109 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 110 #endif 111 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 112 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 113 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 114 nv+cum,&pcbddc->adaptive_constraints_idxs, 115 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 116 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 117 118 maxneigs = 0; 119 cum = cum2 = cumarray = 0; 120 if (sub_schurs->is_Ej_com) { 121 const PetscInt *idxs; 122 123 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 124 for (cum=0;cum<nv;cum++) { 125 pcbddc->adaptive_constraints_n[cum] = 1; 126 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 127 pcbddc->adaptive_constraints_ptrs[cum] = cum; 128 pcbddc->adaptive_constraints_data[cum] = 1.0; 129 } 130 cum2 = cum; 131 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 132 } 133 134 if (mss) { /* multilevel */ 135 if (pcbddc->use_deluxe_scaling) { 136 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 137 } else { 138 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 139 } 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 141 } 142 143 cumsubs = 0; 144 for (i=0;i<sub_schurs->n_subs;i++) { 145 PetscInt j,subset_size; 146 147 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 148 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 149 const PetscInt *idxs; 150 PetscScalar one = 1.0,zero=0.0; 151 PetscBLASInt B_N; 152 153 /* S should be copied since we need it for deluxe scaling */ 154 if (symmetric) { 155 PetscInt j; 156 for (j=0;j<subset_size;j++) { 157 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 158 } 159 for (j=0;j<subset_size;j++) { 160 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 } else { 163 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 } 166 /* is always this the right matrix? */ 167 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 169 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 170 /* St = Starray+cumarray; */ 171 172 if (symmetric) { 173 PetscBLASInt B_itype = 1; 174 PetscBLASInt B_IL = 1, B_IU; 175 PetscScalar eps = -1.0; /* dlamch? */ 176 177 /* ask for eigenvalues lower than thresh */ 178 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 179 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 180 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 181 #if defined(PETSC_USE_COMPLEX) 182 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 183 #else 184 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 185 #endif 186 ierr = PetscFPTrapPop();CHKERRQ(ierr); 187 if (B_ierr) { 188 if (B_ierr < 0 ) { 189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 190 } else if (B_ierr <= B_N) { 191 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 192 } else { 193 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 194 } 195 } 196 197 if (B_neigs > nmax) { 198 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax); 199 B_neigs = nmax; 200 } 201 202 if (B_neigs < nmin) { 203 PetscBLASInt B_neigs2; 204 205 B_IL = B_neigs + 1; 206 ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr); 207 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 208 if (symmetric) { 209 PetscInt j; 210 for (j=0;j<subset_size;j++) { 211 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 212 } 213 for (j=0;j<subset_size;j++) { 214 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 } else { 217 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 219 } 220 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 221 #if defined(PETSC_USE_COMPLEX) 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 223 #else 224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 225 #endif 226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 227 B_neigs += B_neigs2; 228 } 229 if (B_ierr) { 230 if (B_ierr < 0 ) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 232 } else if (B_ierr <= B_N) { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 234 } else { 235 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 236 } 237 } 238 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 239 for (j=0;j<B_neigs;j++) { 240 if (eigs[j] == 0.0) { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 242 } else { 243 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 244 } 245 } 246 } else { 247 /* TODO */ 248 } 249 maxneigs = PetscMax(B_neigs,maxneigs); 250 pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs; 251 cumsubs++; 252 253 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 254 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 255 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 256 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 257 for (j=0;j<B_neigs;j++) { 258 #if 0 259 { 260 PetscBLASInt Blas_N,Blas_one = 1.0; 261 PetscScalar norm; 262 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 263 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 264 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 265 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 266 } else { 267 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 268 } 269 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 270 } 271 #endif 272 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 273 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 274 cum2 += subset_size; 275 } 276 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 277 } 278 /* shift for next computation */ 279 cumarray += subset_size*subset_size; 280 } 281 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 282 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 283 284 if (mss) { 285 if (pcbddc->use_deluxe_scaling) { 286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 287 } else { 288 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 289 } 290 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 291 } 292 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 293 #if defined(PETSC_USE_COMPLEX) 294 ierr = PetscFree(rwork);CHKERRQ(ierr); 295 #endif 296 if (pcbddc->dbg_flag) { 297 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 298 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 299 } 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "PCBDDCSetUpSolvers" 305 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 306 { 307 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 308 PetscScalar *coarse_submat_vals; 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 /* Setup local scatters R_to_B and (optionally) R_to_D */ 313 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 314 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 315 316 /* Setup local neumann solver ksp_R */ 317 /* PCBDDCSetUpLocalScatters should be called first! */ 318 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 319 320 /* Change global null space passed in by the user if change of basis has been requested */ 321 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 322 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 323 } 324 325 /* 326 Setup local correction and local part of coarse basis. 327 Gives back the dense local part of the coarse matrix in column major ordering 328 */ 329 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 330 331 /* Compute total number of coarse nodes and setup coarse solver */ 332 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 333 334 /* free */ 335 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCResetCustomization" 341 PetscErrorCode PCBDDCResetCustomization(PC pc) 342 { 343 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 344 PetscErrorCode ierr; 345 346 PetscFunctionBegin; 347 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 348 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 349 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 350 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 351 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 352 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 353 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 354 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 355 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 356 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 357 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "PCBDDCResetTopography" 363 PetscErrorCode PCBDDCResetTopography(PC pc) 364 { 365 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 366 PetscErrorCode ierr; 367 368 PetscFunctionBegin; 369 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 370 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 371 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 372 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 373 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "PCBDDCResetSolvers" 379 PetscErrorCode PCBDDCResetSolvers(PC pc) 380 { 381 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 382 PetscScalar *array; 383 PetscErrorCode ierr; 384 385 PetscFunctionBegin; 386 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 387 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 388 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 389 ierr = PetscFree(array);CHKERRQ(ierr); 390 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 391 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 392 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 393 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 394 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 395 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 396 if (pcbddc->local_auxmat2) { 397 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 398 ierr = PetscFree(array);CHKERRQ(ierr); 399 } 400 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 402 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 405 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 406 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 407 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 408 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 409 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 410 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 411 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 412 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 413 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 414 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 415 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 421 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 422 { 423 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 424 PC_IS *pcis = (PC_IS*)pc->data; 425 VecType impVecType; 426 PetscInt n_constraints,n_R,old_size; 427 PetscErrorCode ierr; 428 429 PetscFunctionBegin; 430 if (!pcbddc->ConstraintMatrix) { 431 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 432 } 433 /* get sizes */ 434 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 435 n_R = pcis->n-pcbddc->n_actual_vertices; 436 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 437 /* local work vectors (try to avoid unneeded work)*/ 438 /* R nodes */ 439 old_size = -1; 440 if (pcbddc->vec1_R) { 441 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 442 } 443 if (n_R != old_size) { 444 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 446 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 447 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 448 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 449 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 450 } 451 /* local primal dofs */ 452 old_size = -1; 453 if (pcbddc->vec1_P) { 454 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 455 } 456 if (pcbddc->local_primal_size != old_size) { 457 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 459 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 460 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 461 } 462 /* local explicit constraints */ 463 old_size = -1; 464 if (pcbddc->vec1_C) { 465 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 466 } 467 if (n_constraints && n_constraints != old_size) { 468 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 469 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 470 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 471 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 472 } 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "PCBDDCSetUpCorrection" 478 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 479 { 480 PetscErrorCode ierr; 481 /* pointers to pcis and pcbddc */ 482 PC_IS* pcis = (PC_IS*)pc->data; 483 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 484 /* submatrices of local problem */ 485 Mat A_RV,A_VR,A_VV; 486 /* submatrices of local coarse problem */ 487 Mat S_VV,S_CV,S_VC,S_CC; 488 /* working matrices */ 489 Mat C_CR; 490 /* additional working stuff */ 491 PC pc_R; 492 Mat F; 493 PetscBool isLU,isCHOL,isILU; 494 495 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 496 PetscScalar *work; 497 PetscInt *idx_V_B; 498 PetscInt n,n_vertices,n_constraints; 499 PetscInt i,n_R,n_D,n_B; 500 PetscBool unsymmetric_check; 501 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 502 MatType impMatType; 503 /* some shortcuts to scalars */ 504 PetscScalar one=1.0,m_one=-1.0; 505 506 PetscFunctionBegin; 507 /* get number of vertices (corners plus constraints with change of basis) 508 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 509 n_vertices = pcbddc->n_actual_vertices; 510 n_constraints = pcbddc->local_primal_size-n_vertices; 511 /* Set Non-overlapping dimensions */ 512 n_B = pcis->n_B; n_D = pcis->n - n_B; 513 n_R = pcis->n-n_vertices; 514 515 /* Set types for local objects needed by BDDC precondtioner */ 516 impMatType = MATSEQDENSE; 517 518 /* vertices in boundary numbering */ 519 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 520 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 521 if (i != n_vertices) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 523 } 524 525 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 526 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 527 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 528 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 529 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 530 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 531 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 532 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 534 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 535 536 unsymmetric_check = PETSC_FALSE; 537 /* allocate workspace */ 538 n = 0; 539 if (n_constraints) { 540 n += n_R*n_constraints; 541 } 542 if (n_vertices) { 543 n = PetscMax(2*n_R*n_vertices,n); 544 } 545 if (!pcbddc->issym) { 546 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 547 unsymmetric_check = PETSC_TRUE; 548 } 549 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 550 551 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 552 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 553 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 554 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 555 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 556 if (isLU || isILU || isCHOL) { 557 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 558 } else { 559 F = NULL; 560 } 561 562 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 563 if (n_constraints) { 564 Mat M1,M2,M3; 565 IS is_aux; 566 /* see if we can save some allocations */ 567 if (pcbddc->local_auxmat2) { 568 PetscInt on_R,on_constraints; 569 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 570 if (on_R != n_R || on_constraints != n_constraints) { 571 PetscScalar *marray; 572 573 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 574 ierr = PetscFree(marray);CHKERRQ(ierr); 575 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 576 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 577 } 578 } 579 /* auxiliary matrices */ 580 if (!pcbddc->local_auxmat2) { 581 PetscScalar *marray; 582 583 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 584 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 marray += n_R*n_constraints; 586 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 587 } 588 589 /* Extract constraints on R nodes: C_{CR} */ 590 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 591 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 592 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 593 594 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 595 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 596 for (i=0;i<n_constraints;i++) { 597 const PetscScalar *row_cmat_values; 598 const PetscInt *row_cmat_indices; 599 PetscInt size_of_constraint,j; 600 601 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 602 for (j=0;j<size_of_constraint;j++) { 603 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 604 } 605 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 606 } 607 if (F) { 608 Mat B; 609 610 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 611 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 612 ierr = MatDestroy(&B);CHKERRQ(ierr); 613 } else { 614 PetscScalar *xarray; 615 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 616 for (i=0;i<n_constraints;i++) { 617 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 618 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 619 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 620 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 621 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 622 } 623 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 624 } 625 626 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 627 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 628 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 629 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 630 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 631 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 632 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 633 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 634 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 635 ierr = MatDestroy(&M2);CHKERRQ(ierr); 636 ierr = MatDestroy(&M3);CHKERRQ(ierr); 637 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 638 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 639 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 640 ierr = MatDestroy(&M1);CHKERRQ(ierr); 641 } 642 643 /* Get submatrices from subdomain matrix */ 644 if (n_vertices) { 645 Mat newmat; 646 IS is_aux; 647 PetscInt ibs,mbs; 648 PetscBool issbaij; 649 650 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 651 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 652 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 653 if (ibs != mbs) { /* need to convert to SEQAIJ */ 654 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 655 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 656 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 657 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 658 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 659 } else { 660 /* this is safe */ 661 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 662 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 663 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 664 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 666 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 667 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 668 } else { 669 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 670 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 671 } 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 } 675 676 /* Matrix of coarse basis functions (local) */ 677 if (pcbddc->coarse_phi_B) { 678 PetscInt on_B,on_primal,on_D=n_D; 679 if (pcbddc->coarse_phi_D) { 680 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 681 } 682 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 683 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 684 PetscScalar *marray; 685 686 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 687 ierr = PetscFree(marray);CHKERRQ(ierr); 688 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 689 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 690 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 691 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 692 } 693 } 694 695 if (!pcbddc->coarse_phi_B) { 696 PetscScalar *marray; 697 698 n = n_B*pcbddc->local_primal_size; 699 if (pcbddc->switch_static || pcbddc->dbg_flag) { 700 n += n_D*pcbddc->local_primal_size; 701 } 702 if (!pcbddc->issym) { 703 n *= 2; 704 } 705 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 706 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 710 n += n_D*pcbddc->local_primal_size; 711 } 712 if (!pcbddc->issym) { 713 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 714 if (pcbddc->switch_static) { 715 n = n_B*pcbddc->local_primal_size; 716 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 717 } 718 } else { 719 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 720 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 721 if (pcbddc->switch_static) { 722 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 723 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 724 } 725 } 726 } 727 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 728 /* vertices */ 729 if (n_vertices) { 730 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 731 PetscScalar *x,*y; 732 PetscBLASInt B_N,B_one = 1; 733 734 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 735 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 736 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 737 if (F) { 738 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 739 } else { 740 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 741 for (i=0;i<n_vertices;i++) { 742 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 743 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 744 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 745 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 746 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 747 } 748 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 749 } 750 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 751 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 752 if (n_constraints) { 753 Mat B; 754 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 755 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 756 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 757 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 758 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 759 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 760 ierr = MatDestroy(&B);CHKERRQ(ierr); 761 } 762 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 763 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 764 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 765 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 766 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 767 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 768 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 769 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 770 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 771 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 772 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 773 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 774 /* coarse basis functions */ 775 for (i=0;i<n_vertices;i++) { 776 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 778 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 779 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 y[n_B*i+idx_V_B[i]] = 1.0; 782 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 783 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 784 785 if (pcbddc->switch_static || pcbddc->dbg_flag) { 786 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 787 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 788 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 791 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 792 } 793 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 794 } 795 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 796 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 797 } 798 799 if (n_constraints) { 800 Mat B; 801 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 803 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 804 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 805 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 806 if (n_vertices) { 807 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 /* coarse basis functions */ 811 for (i=0;i<n_constraints;i++) { 812 PetscScalar *y; 813 814 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 815 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 816 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 817 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 818 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 822 if (pcbddc->switch_static || pcbddc->dbg_flag) { 823 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 824 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 825 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 829 } 830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 831 } 832 } 833 834 /* compute other basis functions for non-symmetric problems */ 835 if (!pcbddc->issym) { 836 Mat B,X; 837 838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 839 840 if (n_constraints) { 841 Mat S_VCT,S_CCT,B_C; 842 843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 844 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 845 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 846 if (n_vertices) { 847 Mat B_V; 848 849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 850 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 851 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 852 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 853 } 854 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 855 } 856 if (n_vertices) { 857 Mat A_VRT; 858 PetscBLASInt B_N,B_one = 1; 859 860 if (!n_constraints) { /* if there are no constraints, reset work */ 861 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 862 } 863 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 864 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 865 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 866 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 867 } 868 869 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 870 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 871 for (i=0;i<pcbddc->local_primal_size;i++) { 872 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 873 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 874 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 875 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 876 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 877 } 878 } else { 879 for (i=0;i<pcbddc->local_primal_size;i++) { 880 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 881 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 882 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 883 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 884 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 885 } 886 } 887 ierr = MatDestroy(&B);CHKERRQ(ierr); 888 /* coarse basis functions */ 889 for (i=0;i<pcbddc->local_primal_size;i++) { 890 PetscScalar *y; 891 892 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 893 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 if (i<n_vertices) { 898 y[n_B*i+idx_V_B[i]] = 1.0; 899 } 900 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 901 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 902 903 if (pcbddc->switch_static || pcbddc->dbg_flag) { 904 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 905 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 906 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 909 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 910 } 911 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 912 } 913 ierr = MatDestroy(&X);CHKERRQ(ierr); 914 } 915 916 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 917 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 919 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 920 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 921 /* Checking coarse_sub_mat and coarse basis functios */ 922 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 923 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 924 if (pcbddc->dbg_flag) { 925 Mat coarse_sub_mat; 926 Mat AUXMAT,TM1,TM2,TM3,TM4; 927 Mat coarse_phi_D,coarse_phi_B; 928 Mat coarse_psi_D,coarse_psi_B; 929 Mat A_II,A_BB,A_IB,A_BI; 930 MatType checkmattype=MATSEQAIJ; 931 PetscReal real_value; 932 933 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 934 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 935 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 936 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 937 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 938 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 939 if (unsymmetric_check) { 940 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 941 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 942 } 943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 944 945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 946 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 948 if (unsymmetric_check) { 949 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 950 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 951 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 952 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 953 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 954 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 955 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 956 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 957 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 958 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 959 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 960 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 961 } else { 962 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 963 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 964 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 965 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 966 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 967 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 968 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 969 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 970 } 971 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 972 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 973 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 974 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 975 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 976 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 977 978 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 981 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 982 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 983 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 984 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 985 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 986 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 987 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 988 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 989 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 990 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 991 if (unsymmetric_check) { 992 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 993 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 994 } 995 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 996 } 997 998 /* free memory */ 999 ierr = PetscFree(work);CHKERRQ(ierr); 1000 if (n_vertices) { 1001 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1002 } 1003 if (n_constraints) { 1004 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1005 } 1006 /* get back data */ 1007 *coarse_submat_vals_n = coarse_submat_vals; 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1013 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1014 { 1015 Mat *work_mat; 1016 IS isrow_s,iscol_s; 1017 PetscBool rsorted,csorted; 1018 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1023 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1024 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1025 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1026 1027 if (!rsorted) { 1028 const PetscInt *idxs; 1029 PetscInt *idxs_sorted,i; 1030 1031 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1032 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1033 for (i=0;i<rsize;i++) { 1034 idxs_perm_r[i] = i; 1035 } 1036 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1037 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1038 for (i=0;i<rsize;i++) { 1039 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1040 } 1041 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1042 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1043 } else { 1044 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1045 isrow_s = isrow; 1046 } 1047 1048 if (!csorted) { 1049 if (isrow == iscol) { 1050 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1051 iscol_s = isrow_s; 1052 } else { 1053 const PetscInt *idxs; 1054 PetscInt *idxs_sorted,i; 1055 1056 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1057 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1058 for (i=0;i<csize;i++) { 1059 idxs_perm_c[i] = i; 1060 } 1061 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1062 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1063 for (i=0;i<csize;i++) { 1064 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1065 } 1066 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1068 } 1069 } else { 1070 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1071 iscol_s = iscol; 1072 } 1073 1074 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1075 1076 if (!rsorted || !csorted) { 1077 Mat new_mat; 1078 IS is_perm_r,is_perm_c; 1079 1080 if (!rsorted) { 1081 PetscInt *idxs_r,i; 1082 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1083 for (i=0;i<rsize;i++) { 1084 idxs_r[idxs_perm_r[i]] = i; 1085 } 1086 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1087 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1088 } else { 1089 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1090 } 1091 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1092 1093 if (!csorted) { 1094 if (isrow_s == iscol_s) { 1095 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1096 is_perm_c = is_perm_r; 1097 } else { 1098 PetscInt *idxs_c,i; 1099 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1100 for (i=0;i<csize;i++) { 1101 idxs_c[idxs_perm_c[i]] = i; 1102 } 1103 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1105 } 1106 } else { 1107 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1108 } 1109 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1110 1111 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1112 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1113 work_mat[0] = new_mat; 1114 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1115 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1116 } 1117 1118 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1119 *B = work_mat[0]; 1120 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1121 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1122 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1128 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1129 { 1130 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1131 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1132 Mat new_mat; 1133 IS is_local,is_global; 1134 PetscInt local_size; 1135 PetscBool isseqaij; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1140 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1141 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1142 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1143 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1144 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1145 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1146 1147 /* check */ 1148 if (pcbddc->dbg_flag) { 1149 Vec x,x_change; 1150 PetscReal error; 1151 1152 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1153 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1154 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1155 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1158 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1161 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1164 ierr = VecDestroy(&x);CHKERRQ(ierr); 1165 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1166 } 1167 1168 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1169 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1170 if (isseqaij) { 1171 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1172 } else { 1173 Mat work_mat; 1174 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1175 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1176 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1177 } 1178 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1179 /* 1180 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1181 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1182 */ 1183 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1189 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1190 { 1191 PC_IS* pcis = (PC_IS*)(pc->data); 1192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1193 IS is_aux1,is_aux2; 1194 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1195 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1196 PetscInt vbs,bs; 1197 PetscBT bitmask; 1198 PetscErrorCode ierr; 1199 1200 PetscFunctionBegin; 1201 /* 1202 No need to setup local scatters if 1203 - primal space is unchanged 1204 AND 1205 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1206 AND 1207 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1208 */ 1209 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1210 PetscFunctionReturn(0); 1211 } 1212 /* destroy old objects */ 1213 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1214 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1215 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1216 /* Set Non-overlapping dimensions */ 1217 n_B = pcis->n_B; n_D = pcis->n - n_B; 1218 n_vertices = pcbddc->n_actual_vertices; 1219 /* create auxiliary bitmask */ 1220 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1221 for (i=0;i<n_vertices;i++) { 1222 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1223 } 1224 1225 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1226 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1227 for (i=0, n_R=0; i<pcis->n; i++) { 1228 if (!PetscBTLookup(bitmask,i)) { 1229 idx_R_local[n_R] = i; 1230 n_R++; 1231 } 1232 } 1233 1234 /* Block code */ 1235 vbs = 1; 1236 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1237 if (bs>1 && !(n_vertices%bs)) { 1238 PetscBool is_blocked = PETSC_TRUE; 1239 PetscInt *vary; 1240 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1241 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1242 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1243 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1244 for (i=0; i<n_vertices; i++) { 1245 if (vary[i]!=0 && vary[i]!=bs) { 1246 is_blocked = PETSC_FALSE; 1247 break; 1248 } 1249 } 1250 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1251 vbs = bs; 1252 for (i=0;i<n_R/vbs;i++) { 1253 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1254 } 1255 } 1256 ierr = PetscFree(vary);CHKERRQ(ierr); 1257 } 1258 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1259 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1260 1261 /* print some info if requested */ 1262 if (pcbddc->dbg_flag) { 1263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1271 } 1272 1273 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1274 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1275 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1276 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1277 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1278 for (i=0; i<n_D; i++) { 1279 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1280 } 1281 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1282 for (i=0, j=0; i<n_R; i++) { 1283 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1284 aux_array1[j++] = i; 1285 } 1286 } 1287 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1288 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1289 for (i=0, j=0; i<n_B; i++) { 1290 if (!PetscBTLookup(bitmask,is_indices[i])) { 1291 aux_array2[j++] = i; 1292 } 1293 } 1294 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1295 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1296 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1297 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1298 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1299 1300 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1301 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1302 for (i=0, j=0; i<n_R; i++) { 1303 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1304 aux_array1[j++] = i; 1305 } 1306 } 1307 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1308 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1309 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1310 } 1311 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1312 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1319 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1320 { 1321 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1322 PC_IS *pcis = (PC_IS*)pc->data; 1323 PC pc_temp; 1324 Mat A_RR; 1325 MatReuse reuse; 1326 PetscScalar m_one = -1.0; 1327 PetscReal value; 1328 PetscInt n_D,n_R,ibs,mbs; 1329 PetscBool use_exact,use_exact_reduced,issbaij; 1330 PetscErrorCode ierr; 1331 /* prefixes stuff */ 1332 char dir_prefix[256],neu_prefix[256],str_level[16]; 1333 size_t len; 1334 1335 PetscFunctionBegin; 1336 1337 /* compute prefixes */ 1338 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1339 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1340 if (!pcbddc->current_level) { 1341 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1342 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1343 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1344 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1345 } else { 1346 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1347 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1348 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1349 len -= 15; /* remove "pc_bddc_coarse_" */ 1350 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1351 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1352 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1353 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1354 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1355 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1356 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1357 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1358 } 1359 1360 /* DIRICHLET PROBLEM */ 1361 if (dirichlet) { 1362 /* Matrix for Dirichlet problem is pcis->A_II */ 1363 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1364 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1365 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1366 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1367 /* default */ 1368 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1369 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1370 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1371 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1372 if (issbaij) { 1373 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1374 } else { 1375 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1376 } 1377 /* Allow user's customization */ 1378 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1379 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1382 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1383 if (!n_D) { 1384 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1385 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1386 } 1387 /* Set Up KSP for Dirichlet problem of BDDC */ 1388 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1389 /* set ksp_D into pcis data */ 1390 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1391 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1392 pcis->ksp_D = pcbddc->ksp_D; 1393 } 1394 1395 /* NEUMANN PROBLEM */ 1396 A_RR = 0; 1397 if (neumann) { 1398 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1399 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1400 if (pcbddc->ksp_R) { /* already created ksp */ 1401 PetscInt nn_R; 1402 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1403 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1404 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1405 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1406 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1408 reuse = MAT_INITIAL_MATRIX; 1409 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1410 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1411 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1412 reuse = MAT_INITIAL_MATRIX; 1413 } else { /* safe to reuse the matrix */ 1414 reuse = MAT_REUSE_MATRIX; 1415 } 1416 } 1417 /* last check */ 1418 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1419 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1420 reuse = MAT_INITIAL_MATRIX; 1421 } 1422 } else { /* first time, so we need to create the matrix */ 1423 reuse = MAT_INITIAL_MATRIX; 1424 } 1425 /* extract A_RR */ 1426 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1427 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1428 if (ibs != mbs) { 1429 Mat newmat; 1430 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1431 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1432 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1433 } else { 1434 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1435 } 1436 if (!pcbddc->ksp_R) { /* create object if not present */ 1437 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1438 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1439 /* default */ 1440 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1441 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1442 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1444 if (issbaij) { 1445 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1446 } else { 1447 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1448 } 1449 /* Allow user's customization */ 1450 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1451 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1452 } 1453 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1454 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1455 if (!n_R) { 1456 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1457 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1458 } 1459 /* Set Up KSP for Neumann problem of BDDC */ 1460 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1461 } 1462 1463 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1464 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1465 if (pcbddc->dbg_flag) { 1466 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1469 } 1470 if (dirichlet) { /* Dirichlet */ 1471 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1472 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1473 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1474 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1475 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1476 /* need to be adapted? */ 1477 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1478 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1479 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1480 /* print info */ 1481 if (pcbddc->dbg_flag) { 1482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1484 } 1485 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1486 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1487 } 1488 } 1489 if (neumann) { /* Neumann */ 1490 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1491 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1492 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1493 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1494 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1495 /* need to be adapted? */ 1496 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1497 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1498 /* print info */ 1499 if (pcbddc->dbg_flag) { 1500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1501 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1502 } 1503 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1504 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1505 } 1506 } 1507 } 1508 /* free Neumann problem's matrix */ 1509 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1515 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1516 { 1517 PetscErrorCode ierr; 1518 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1519 1520 PetscFunctionBegin; 1521 if (applytranspose) { 1522 if (pcbddc->local_auxmat1) { 1523 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1524 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1525 } 1526 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1527 } else { 1528 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1529 if (pcbddc->local_auxmat1) { 1530 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1531 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1532 } 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1540 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1541 { 1542 PetscErrorCode ierr; 1543 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1544 PC_IS* pcis = (PC_IS*) (pc->data); 1545 const PetscScalar zero = 0.0; 1546 1547 PetscFunctionBegin; 1548 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1549 if (applytranspose) { 1550 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1551 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1552 } else { 1553 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1554 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1555 } 1556 /* start communications from local primal nodes to rhs of coarse solver */ 1557 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1558 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1560 1561 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1562 /* TODO remove null space when doing multilevel */ 1563 if (pcbddc->coarse_ksp) { 1564 if (applytranspose) { 1565 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1566 } else { 1567 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1568 } 1569 } 1570 1571 /* Local solution on R nodes */ 1572 if (pcis->n) { 1573 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1574 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1575 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1576 if (pcbddc->switch_static) { 1577 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1578 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1579 } 1580 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1581 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1582 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 if (pcbddc->switch_static) { 1585 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 } 1588 } 1589 1590 /* communications from coarse sol to local primal nodes */ 1591 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1592 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1593 1594 /* Sum contributions from two levels */ 1595 if (applytranspose) { 1596 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1597 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1598 } else { 1599 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1600 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1608 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1609 { 1610 PetscErrorCode ierr; 1611 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1612 PetscScalar *array,*array2; 1613 Vec from,to; 1614 1615 PetscFunctionBegin; 1616 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1617 from = pcbddc->coarse_vec; 1618 to = pcbddc->vec1_P; 1619 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1620 Vec tvec; 1621 PetscInt lsize; 1622 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1623 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1624 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1625 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1626 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1627 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1628 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1629 } 1630 } else { /* from local to global -> put data in coarse right hand side */ 1631 from = pcbddc->vec1_P; 1632 to = pcbddc->coarse_vec; 1633 } 1634 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1640 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1641 { 1642 PetscErrorCode ierr; 1643 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1644 PetscScalar *array,*array2; 1645 Vec from,to; 1646 1647 PetscFunctionBegin; 1648 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1649 from = pcbddc->coarse_vec; 1650 to = pcbddc->vec1_P; 1651 } else { /* from local to global -> put data in coarse right hand side */ 1652 from = pcbddc->vec1_P; 1653 to = pcbddc->coarse_vec; 1654 } 1655 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1656 if (smode == SCATTER_FORWARD) { 1657 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1658 Vec tvec; 1659 PetscInt lsize; 1660 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1661 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1662 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1663 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1664 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1665 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1666 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1667 } 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 1672 /* uncomment for testing purposes */ 1673 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1676 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1677 { 1678 PetscErrorCode ierr; 1679 PC_IS* pcis = (PC_IS*)(pc->data); 1680 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1681 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1682 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1683 MatType impMatType=MATSEQAIJ; 1684 /* one and zero */ 1685 PetscScalar one=1.0,zero=0.0; 1686 /* space to store constraints and their local indices */ 1687 PetscScalar *temp_quadrature_constraint; 1688 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1689 /* iterators */ 1690 PetscInt i,j,k,total_counts,temp_start_ptr; 1691 /* stuff to store connected components stored in pcbddc->mat_graph */ 1692 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1693 PetscInt n_ISForFaces,n_ISForEdges; 1694 /* near null space stuff */ 1695 MatNullSpace nearnullsp; 1696 const Vec *nearnullvecs; 1697 Vec *localnearnullsp; 1698 PetscBool nnsp_has_cnst; 1699 PetscInt nnsp_size; 1700 PetscScalar *array; 1701 /* BLAS integers */ 1702 PetscBLASInt lwork,lierr; 1703 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1704 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1705 /* LAPACK working arrays for SVD or POD */ 1706 PetscBool skip_lapack; 1707 PetscScalar *work; 1708 PetscReal *singular_vals; 1709 #if defined(PETSC_USE_COMPLEX) 1710 PetscReal *rwork; 1711 #endif 1712 #if defined(PETSC_MISSING_LAPACK_GESVD) 1713 PetscBLASInt Blas_one_2=1; 1714 PetscScalar *temp_basis,*correlation_mat; 1715 #else 1716 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1717 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1718 #endif 1719 /* reuse */ 1720 PetscInt olocal_primal_size; 1721 PetscInt *oprimal_indices_local_idxs; 1722 /* change of basis */ 1723 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1724 PetscBool boolforchange,qr_needed; 1725 PetscBT touched,change_basis,qr_needed_idx; 1726 /* auxiliary stuff */ 1727 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1728 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1729 PetscScalar *temp_quadrature_constraint_work=NULL; 1730 /* some quantities */ 1731 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1732 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1733 1734 1735 PetscFunctionBegin; 1736 /* Destroy Mat objects computed previously */ 1737 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1738 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1739 /* TODO synch with adaptive selection */ 1740 /* Get index sets for faces, edges and vertices from graph */ 1741 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1742 /* free unneeded index sets */ 1743 if (!pcbddc->use_vertices) { 1744 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1745 } 1746 if (!pcbddc->use_edges) { 1747 for (i=0;i<n_ISForEdges;i++) { 1748 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1751 n_ISForEdges = 0; 1752 } 1753 if (!pcbddc->use_faces) { 1754 for (i=0;i<n_ISForFaces;i++) { 1755 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1756 } 1757 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1758 n_ISForFaces = 0; 1759 } 1760 /* HACKS (the following two blocks of code) */ 1761 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1762 pcbddc->use_change_of_basis = PETSC_TRUE; 1763 if (!ISForEdges) { 1764 pcbddc->use_change_on_faces = PETSC_TRUE; 1765 } 1766 } 1767 if (pcbddc->NullSpace) { 1768 /* use_change_of_basis should be consistent among processors */ 1769 PetscBool tbool[2],gbool[2]; 1770 tbool [0] = pcbddc->use_change_of_basis; 1771 tbool [1] = pcbddc->use_change_on_faces; 1772 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1773 pcbddc->use_change_of_basis = gbool[0]; 1774 pcbddc->use_change_on_faces = gbool[1]; 1775 } 1776 /* print some info */ 1777 if (pcbddc->dbg_flag) { 1778 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1779 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1780 i = 0; 1781 if (ISForVertices) { 1782 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1783 } 1784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1788 } 1789 1790 if (!pcbddc->adaptive_selection) { 1791 /* check if near null space is attached to global mat */ 1792 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1793 if (nearnullsp) { 1794 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1795 /* remove any stored info */ 1796 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1797 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1798 /* store information for BDDC solver reuse */ 1799 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1800 pcbddc->onearnullspace = nearnullsp; 1801 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1802 for (i=0;i<nnsp_size;i++) { 1803 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1804 } 1805 } else { /* if near null space is not provided BDDC uses constants by default */ 1806 nnsp_size = 0; 1807 nnsp_has_cnst = PETSC_TRUE; 1808 } 1809 /* get max number of constraints on a single cc */ 1810 max_constraints = nnsp_size; 1811 if (nnsp_has_cnst) max_constraints++; 1812 1813 /* 1814 Evaluate maximum storage size needed by the procedure 1815 - temp_indices will contain start index of each constraint stored as follows 1816 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1817 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1818 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1819 */ 1820 total_counts = n_ISForFaces+n_ISForEdges; 1821 total_counts *= max_constraints; 1822 n_vertices = 0; 1823 if (ISForVertices) { 1824 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1825 } 1826 total_counts += n_vertices; 1827 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1828 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1829 total_counts = 0; 1830 max_size_of_constraint = 0; 1831 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1832 if (i<n_ISForEdges) { 1833 used_IS = &ISForEdges[i]; 1834 } else { 1835 used_IS = &ISForFaces[i-n_ISForEdges]; 1836 } 1837 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1838 total_counts += j; 1839 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1840 } 1841 total_counts *= max_constraints; 1842 total_counts += n_vertices; 1843 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1844 /* get local part of global near null space vectors */ 1845 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1846 for (k=0;k<nnsp_size;k++) { 1847 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1848 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1849 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 } 1851 1852 /* whether or not to skip lapack calls */ 1853 skip_lapack = PETSC_TRUE; 1854 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1855 1856 /* allocate some auxiliary stuff */ 1857 if (!skip_lapack || pcbddc->use_qr_single) { 1858 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1859 } 1860 1861 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1862 if (!skip_lapack) { 1863 PetscScalar temp_work; 1864 1865 #if defined(PETSC_MISSING_LAPACK_GESVD) 1866 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1867 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1868 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1869 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1870 #if defined(PETSC_USE_COMPLEX) 1871 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1872 #endif 1873 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1874 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1876 lwork = -1; 1877 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1878 #if !defined(PETSC_USE_COMPLEX) 1879 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1880 #else 1881 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1882 #endif 1883 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1884 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1885 #else /* on missing GESVD */ 1886 /* SVD */ 1887 PetscInt max_n,min_n; 1888 max_n = max_size_of_constraint; 1889 min_n = max_constraints; 1890 if (max_size_of_constraint < max_constraints) { 1891 min_n = max_size_of_constraint; 1892 max_n = max_constraints; 1893 } 1894 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1895 #if defined(PETSC_USE_COMPLEX) 1896 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1897 #endif 1898 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1899 lwork = -1; 1900 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1902 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1904 #if !defined(PETSC_USE_COMPLEX) 1905 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1906 #else 1907 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1908 #endif 1909 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1910 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1911 #endif /* on missing GESVD */ 1912 /* Allocate optimal workspace */ 1913 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1914 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1915 } 1916 /* Now we can loop on constraining sets */ 1917 total_counts = 0; 1918 temp_indices[0] = 0; 1919 /* vertices */ 1920 if (ISForVertices) { 1921 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1922 if (nnsp_has_cnst) { /* consider all vertices */ 1923 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1924 for (i=0;i<n_vertices;i++) { 1925 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1926 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1927 total_counts++; 1928 } 1929 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1930 PetscBool used_vertex; 1931 for (i=0;i<n_vertices;i++) { 1932 used_vertex = PETSC_FALSE; 1933 k = 0; 1934 while (!used_vertex && k<nnsp_size) { 1935 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1936 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1937 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1938 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1939 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1940 total_counts++; 1941 used_vertex = PETSC_TRUE; 1942 } 1943 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1944 k++; 1945 } 1946 } 1947 } 1948 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1949 n_vertices = total_counts; 1950 } 1951 1952 /* edges and faces */ 1953 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1954 if (ncc<n_ISForEdges) { 1955 used_IS = &ISForEdges[ncc]; 1956 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1957 } else { 1958 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1959 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1960 } 1961 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1962 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1963 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1964 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1965 /* change of basis should not be performed on local periodic nodes */ 1966 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1967 if (nnsp_has_cnst) { 1968 PetscScalar quad_value; 1969 temp_constraints++; 1970 if (!pcbddc->use_nnsp_true) { 1971 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1972 } else { 1973 quad_value = 1.0; 1974 } 1975 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1976 for (j=0;j<size_of_constraint;j++) { 1977 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1978 } 1979 /* sort by global ordering if using lapack subroutines (not needed!) */ 1980 if (!skip_lapack || pcbddc->use_qr_single) { 1981 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1982 for (j=0;j<size_of_constraint;j++) { 1983 permutation[j]=j; 1984 } 1985 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1986 for (j=0;j<size_of_constraint;j++) { 1987 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 1988 } 1989 for (j=0;j<size_of_constraint;j++) { 1990 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1991 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1992 } 1993 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1994 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1995 } 1996 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1997 total_counts++; 1998 } 1999 for (k=0;k<nnsp_size;k++) { 2000 PetscReal real_value; 2001 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2002 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2003 for (j=0;j<size_of_constraint;j++) { 2004 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2005 } 2006 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2007 /* check if array is null on the connected component */ 2008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2009 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2010 if (real_value > 0.0) { /* keep indices and values */ 2011 /* sort by global ordering if using lapack subroutines */ 2012 if (!skip_lapack || pcbddc->use_qr_single) { 2013 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2014 for (j=0;j<size_of_constraint;j++) { 2015 permutation[j]=j; 2016 } 2017 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2018 for (j=0;j<size_of_constraint;j++) { 2019 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2020 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2021 } 2022 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2023 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2024 } 2025 temp_constraints++; 2026 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2027 total_counts++; 2028 } 2029 } 2030 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2031 valid_constraints = temp_constraints; 2032 if (!pcbddc->use_nnsp_true && temp_constraints) { 2033 if (temp_constraints == 1) { /* just normalize the constraint */ 2034 PetscScalar norm; 2035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2036 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2037 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2038 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2039 } else { /* perform SVD */ 2040 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2041 2042 #if defined(PETSC_MISSING_LAPACK_GESVD) 2043 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2044 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2045 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2046 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2047 from that computed using LAPACKgesvd 2048 -> This is due to a different computation of eigenvectors in LAPACKheev 2049 -> The quality of the POD-computed basis will be the same */ 2050 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2051 /* Store upper triangular part of correlation matrix */ 2052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2053 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2054 for (j=0;j<temp_constraints;j++) { 2055 for (k=0;k<j+1;k++) { 2056 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2057 } 2058 } 2059 /* compute eigenvalues and eigenvectors of correlation matrix */ 2060 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2061 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2062 #if !defined(PETSC_USE_COMPLEX) 2063 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2064 #else 2065 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2066 #endif 2067 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2068 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2069 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2070 j = 0; 2071 while (j < temp_constraints && singular_vals[j] < tol) j++; 2072 total_counts = total_counts-j; 2073 valid_constraints = temp_constraints-j; 2074 /* scale and copy POD basis into used quadrature memory */ 2075 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2077 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2081 if (j<temp_constraints) { 2082 PetscInt ii; 2083 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2084 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2085 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2086 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2087 for (k=0;k<temp_constraints-j;k++) { 2088 for (ii=0;ii<size_of_constraint;ii++) { 2089 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2090 } 2091 } 2092 } 2093 #else /* on missing GESVD */ 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2097 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2098 #if !defined(PETSC_USE_COMPLEX) 2099 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2100 #else 2101 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2102 #endif 2103 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2104 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2105 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2106 k = temp_constraints; 2107 if (k > size_of_constraint) k = size_of_constraint; 2108 j = 0; 2109 while (j < k && singular_vals[k-j-1] < tol) j++; 2110 valid_constraints = k-j; 2111 total_counts = total_counts-temp_constraints+valid_constraints; 2112 #endif /* on missing GESVD */ 2113 } 2114 } 2115 /* setting change_of_basis flag is safe now */ 2116 if (boolforchange) { 2117 for (j=0;j<valid_constraints;j++) { 2118 PetscBTSet(change_basis,total_counts-j-1); 2119 } 2120 } 2121 } 2122 /* free workspace */ 2123 if (!skip_lapack || pcbddc->use_qr_single) { 2124 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2125 } 2126 if (!skip_lapack) { 2127 ierr = PetscFree(work);CHKERRQ(ierr); 2128 #if defined(PETSC_USE_COMPLEX) 2129 ierr = PetscFree(rwork);CHKERRQ(ierr); 2130 #endif 2131 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2132 #if defined(PETSC_MISSING_LAPACK_GESVD) 2133 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2134 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2135 #endif 2136 } 2137 for (k=0;k<nnsp_size;k++) { 2138 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2139 } 2140 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2141 } else { 2142 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2143 PetscInt cum = 0; 2144 2145 total_counts = 0; 2146 n_vertices = 0; 2147 if (sub_schurs->is_Ej_com) { 2148 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2149 } 2150 max_constraints = 0; 2151 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2152 total_counts += pcbddc->adaptive_constraints_n[i]; 2153 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2154 } 2155 temp_indices = pcbddc->adaptive_constraints_ptrs; 2156 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2157 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2158 2159 #if 0 2160 printf("Found %d totals\n",total_counts); 2161 for (i=0;i<total_counts;i++) { 2162 printf("const %d, start %d",i,temp_indices[i]); 2163 printf(" end %d:\n",temp_indices[i+1]); 2164 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2165 printf(" idxs %d",temp_indices_to_constraint[j]); 2166 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2167 } 2168 } 2169 #endif 2170 2171 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2172 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2173 /* Change of basis */ 2174 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2175 if (pcbddc->use_change_of_basis) { 2176 cum = n_vertices; 2177 for (i=0;i<sub_schurs->n_subs;i++) { 2178 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2179 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2180 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2181 } 2182 } 2183 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2184 } 2185 } 2186 } 2187 2188 /* free index sets of faces, edges and vertices */ 2189 for (i=0;i<n_ISForFaces;i++) { 2190 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2191 } 2192 if (n_ISForFaces) { 2193 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2194 } 2195 for (i=0;i<n_ISForEdges;i++) { 2196 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2197 } 2198 if (n_ISForEdges) { 2199 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2200 } 2201 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2202 2203 /* map temp_indices_to_constraint in boundary numbering */ 2204 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2205 if (i != temp_indices[total_counts]) { 2206 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2207 } 2208 2209 /* set quantities in pcbddc data structure and store previous primal size */ 2210 /* n_vertices defines the number of subdomain corners in the primal space */ 2211 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2212 olocal_primal_size = pcbddc->local_primal_size; 2213 pcbddc->local_primal_size = total_counts; 2214 pcbddc->n_vertices = n_vertices; 2215 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2216 2217 /* Create constraint matrix */ 2218 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2219 /* so we need to set it up properly either with or without change of basis */ 2220 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2221 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2222 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2223 /* array to compute a local numbering of constraints : vertices first then constraints */ 2224 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2225 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2226 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2227 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2228 /* auxiliary stuff for basis change */ 2229 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2230 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2231 2232 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2233 total_primal_vertices=0; 2234 for (i=0;i<pcbddc->local_primal_size;i++) { 2235 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2236 if (size_of_constraint == 1) { 2237 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2238 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2239 aux_primal_minloc[total_primal_vertices]=0; 2240 total_primal_vertices++; 2241 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2242 PetscInt min_loc,min_index; 2243 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2244 /* find first untouched local node */ 2245 k = 0; 2246 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2247 min_index = global_indices[k]; 2248 min_loc = k; 2249 /* search the minimum among global nodes already untouched on the cc */ 2250 for (k=1;k<size_of_constraint;k++) { 2251 /* there can be more than one constraint on a single connected component */ 2252 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2253 min_index = global_indices[k]; 2254 min_loc = k; 2255 } 2256 } 2257 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2258 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2259 aux_primal_minloc[total_primal_vertices]=min_loc; 2260 total_primal_vertices++; 2261 } 2262 } 2263 /* determine if a QR strategy is needed for change of basis */ 2264 qr_needed = PETSC_FALSE; 2265 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2266 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2267 if (PetscBTLookup(change_basis,i)) { 2268 if (!pcbddc->use_qr_single) { 2269 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2270 j = 0; 2271 for (k=0;k<size_of_constraint;k++) { 2272 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2273 j++; 2274 } 2275 } 2276 /* found more than one primal dof on the cc */ 2277 if (j > 1) { 2278 PetscBTSet(qr_needed_idx,i); 2279 qr_needed = PETSC_TRUE; 2280 } 2281 } else { 2282 PetscBTSet(qr_needed_idx,i); 2283 qr_needed = PETSC_TRUE; 2284 } 2285 } 2286 } 2287 /* free workspace */ 2288 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2289 2290 /* permute indices in order to have a sorted set of vertices */ 2291 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2292 2293 /* nonzero structure of constraint matrix */ 2294 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2295 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2296 j=total_primal_vertices; 2297 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2298 if (!PetscBTLookup(change_basis,i)) { 2299 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2300 j++; 2301 } 2302 } 2303 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2304 ierr = PetscFree(nnz);CHKERRQ(ierr); 2305 /* set values in constraint matrix */ 2306 for (i=0;i<total_primal_vertices;i++) { 2307 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2308 } 2309 total_counts = total_primal_vertices; 2310 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2311 if (!PetscBTLookup(change_basis,i)) { 2312 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2313 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2314 total_counts++; 2315 } 2316 } 2317 /* assembling */ 2318 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2319 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2320 /* 2321 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2322 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2323 */ 2324 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2325 if (pcbddc->use_change_of_basis) { 2326 /* dual and primal dofs on a single cc */ 2327 PetscInt dual_dofs,primal_dofs; 2328 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2329 PetscInt primal_counter; 2330 /* working stuff for GEQRF */ 2331 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2332 PetscBLASInt lqr_work; 2333 /* working stuff for UNGQR */ 2334 PetscScalar *gqr_work,lgqr_work_t; 2335 PetscBLASInt lgqr_work; 2336 /* working stuff for TRTRS */ 2337 PetscScalar *trs_rhs; 2338 PetscBLASInt Blas_NRHS; 2339 /* pointers for values insertion into change of basis matrix */ 2340 PetscInt *start_rows,*start_cols; 2341 PetscScalar *start_vals; 2342 /* working stuff for values insertion */ 2343 PetscBT is_primal; 2344 /* matrix sizes */ 2345 PetscInt global_size,local_size; 2346 /* temporary change of basis */ 2347 Mat localChangeOfBasisMatrix; 2348 2349 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2350 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2351 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2352 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2353 2354 /* nonzeros for local mat */ 2355 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2356 for (i=0;i<pcis->n;i++) nnz[i]=1; 2357 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2358 if (PetscBTLookup(change_basis,i)) { 2359 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2360 if (PetscBTLookup(qr_needed_idx,i)) { 2361 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2362 } else { 2363 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2364 /* get local primal index on the cc */ 2365 j = 0; 2366 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2367 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2368 } 2369 } 2370 } 2371 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2372 ierr = PetscFree(nnz);CHKERRQ(ierr); 2373 /* Set initial identity in the matrix */ 2374 for (i=0;i<pcis->n;i++) { 2375 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2376 } 2377 2378 if (pcbddc->dbg_flag) { 2379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2380 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2381 } 2382 2383 2384 /* Now we loop on the constraints which need a change of basis */ 2385 /* 2386 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2387 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2388 2389 Basic blocks of change of basis matrix T computed by 2390 2391 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2392 2393 | 1 0 ... 0 s_1/S | 2394 | 0 1 ... 0 s_2/S | 2395 | ... | 2396 | 0 ... 1 s_{n-1}/S | 2397 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2398 2399 with S = \sum_{i=1}^n s_i^2 2400 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2401 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2402 2403 - QR decomposition of constraints otherwise 2404 */ 2405 if (qr_needed) { 2406 /* space to store Q */ 2407 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2408 /* first we issue queries for optimal work */ 2409 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2410 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2411 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2412 lqr_work = -1; 2413 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2414 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2415 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2416 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2417 lgqr_work = -1; 2418 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2419 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2420 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2421 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2422 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2423 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2424 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2425 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2426 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2427 /* array to store scaling factors for reflectors */ 2428 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2429 /* array to store rhs and solution of triangular solver */ 2430 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2431 /* allocating workspace for check */ 2432 if (pcbddc->dbg_flag) { 2433 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2434 } 2435 } 2436 /* array to store whether a node is primal or not */ 2437 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2438 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2439 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2440 if (i != total_primal_vertices) { 2441 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2442 } 2443 for (i=0;i<total_primal_vertices;i++) { 2444 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2445 } 2446 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2447 2448 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2449 /* -> using implicit ordering contained in temp_indices data */ 2450 total_counts = pcbddc->n_vertices; 2451 primal_counter = total_counts; 2452 while (total_counts<pcbddc->local_primal_size) { 2453 primal_dofs = 1; 2454 if (PetscBTLookup(change_basis,total_counts)) { 2455 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2456 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2457 primal_dofs++; 2458 } 2459 /* get constraint info */ 2460 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2461 dual_dofs = size_of_constraint-primal_dofs; 2462 2463 if (pcbddc->dbg_flag) { 2464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2465 } 2466 2467 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2468 2469 /* copy quadrature constraints for change of basis check */ 2470 if (pcbddc->dbg_flag) { 2471 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2472 } 2473 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2474 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2475 2476 /* compute QR decomposition of constraints */ 2477 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2478 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2479 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2480 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2481 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2483 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2484 2485 /* explictly compute R^-T */ 2486 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2487 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2488 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2489 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2490 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2491 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2492 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2493 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2494 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2495 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2496 2497 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2502 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2503 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2504 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2505 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2506 2507 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2508 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2509 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2514 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2517 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2519 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2520 2521 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2522 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2523 /* insert cols for primal dofs */ 2524 for (j=0;j<primal_dofs;j++) { 2525 start_vals = &qr_basis[j*size_of_constraint]; 2526 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2527 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2528 } 2529 /* insert cols for dual dofs */ 2530 for (j=0,k=0;j<dual_dofs;k++) { 2531 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2532 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2533 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2534 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2535 j++; 2536 } 2537 } 2538 2539 /* check change of basis */ 2540 if (pcbddc->dbg_flag) { 2541 PetscInt ii,jj; 2542 PetscBool valid_qr=PETSC_TRUE; 2543 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2546 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2547 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2548 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2549 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2550 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2552 for (jj=0;jj<size_of_constraint;jj++) { 2553 for (ii=0;ii<primal_dofs;ii++) { 2554 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2555 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2556 } 2557 } 2558 if (!valid_qr) { 2559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2560 for (jj=0;jj<size_of_constraint;jj++) { 2561 for (ii=0;ii<primal_dofs;ii++) { 2562 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2563 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2564 } 2565 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2566 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2567 } 2568 } 2569 } 2570 } else { 2571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2572 } 2573 } 2574 } else { /* simple transformation block */ 2575 PetscInt row,col; 2576 PetscScalar val,norm; 2577 2578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2579 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2580 for (j=0;j<size_of_constraint;j++) { 2581 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2582 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2583 if (!PetscBTLookup(is_primal,row_B)) { 2584 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2585 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2586 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2587 } else { 2588 for (k=0;k<size_of_constraint;k++) { 2589 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2590 if (row != col) { 2591 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2592 } else { 2593 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2594 } 2595 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2596 } 2597 } 2598 } 2599 if (pcbddc->dbg_flag) { 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2601 } 2602 } 2603 /* increment primal counter */ 2604 primal_counter += primal_dofs; 2605 } else { 2606 if (pcbddc->dbg_flag) { 2607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2608 } 2609 } 2610 /* increment constraint counter total_counts */ 2611 total_counts += primal_dofs; 2612 } 2613 2614 /* free workspace */ 2615 if (qr_needed) { 2616 if (pcbddc->dbg_flag) { 2617 ierr = PetscFree(work);CHKERRQ(ierr); 2618 } 2619 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2620 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2621 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2622 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2623 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2624 } 2625 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2626 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2627 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2628 2629 /* assembling of global change of variable */ 2630 { 2631 Mat tmat; 2632 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2633 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2634 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2635 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2636 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2637 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2638 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2639 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2640 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2641 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2642 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2643 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2644 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2645 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2646 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2647 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2648 } 2649 /* check */ 2650 if (pcbddc->dbg_flag) { 2651 PetscReal error; 2652 Vec x,x_change; 2653 2654 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2655 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2656 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2657 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2658 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2659 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2660 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2661 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2662 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2663 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2664 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2665 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2666 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2667 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2668 ierr = VecDestroy(&x);CHKERRQ(ierr); 2669 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2670 } 2671 2672 /* adapt sub_schurs computed (if any) */ 2673 if (pcbddc->use_deluxe_scaling) { 2674 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2675 if (sub_schurs->n_subs_par_g) { 2676 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2677 } 2678 if (sub_schurs->S_Ej_all) { 2679 Mat S_1,S_2,tmat; 2680 IS is_all_N; 2681 2682 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2683 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2684 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2685 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2686 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2687 sub_schurs->S_Ej_all = S_1; 2688 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2689 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2690 sub_schurs->sum_S_Ej_all = S_2; 2691 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2692 } 2693 } 2694 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2695 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2696 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2697 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2698 } 2699 2700 /* set up change of basis context */ 2701 if (pcbddc->ChangeOfBasisMatrix) { 2702 PCBDDCChange_ctx change_ctx; 2703 2704 if (!pcbddc->new_global_mat) { 2705 PetscInt global_size,local_size; 2706 2707 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2708 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2709 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2710 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2711 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2712 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2713 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2714 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2715 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2716 } else { 2717 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2718 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2719 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2720 } 2721 if (!pcbddc->user_ChangeOfBasisMatrix) { 2722 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2723 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2724 } else { 2725 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2726 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2727 } 2728 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2729 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2730 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 } 2733 2734 /* get indices in local ordering for vertices and constraints */ 2735 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2736 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2737 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2738 } 2739 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2740 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2741 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2742 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2743 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2744 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2745 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2746 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2747 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2748 /* set quantities in PCBDDC data struct */ 2749 pcbddc->n_actual_vertices = i; 2750 /* check if a new primal space has been introduced */ 2751 pcbddc->new_primal_space_local = PETSC_TRUE; 2752 if (olocal_primal_size == pcbddc->local_primal_size) { 2753 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2754 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2755 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2756 } 2757 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2758 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2759 2760 /* flush dbg viewer */ 2761 if (pcbddc->dbg_flag) { 2762 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2763 } 2764 2765 /* free workspace */ 2766 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2767 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2768 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2769 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2770 if (!pcbddc->adaptive_selection) { 2771 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2772 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2773 } else { 2774 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2775 pcbddc->adaptive_constraints_ptrs, 2776 pcbddc->adaptive_constraints_idxs, 2777 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2778 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2779 } 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2785 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2786 { 2787 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2788 PC_IS *pcis = (PC_IS*)pc->data; 2789 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2790 PetscInt ierr,i,vertex_size; 2791 PetscViewer viewer=pcbddc->dbg_viewer; 2792 2793 PetscFunctionBegin; 2794 /* Reset previously computed graph */ 2795 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2796 /* Init local Graph struct */ 2797 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2798 2799 /* Check validity of the csr graph passed in by the user */ 2800 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2801 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2802 } 2803 2804 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2805 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2806 Mat mat_adj; 2807 PetscInt *xadj,*adjncy; 2808 PetscInt nvtxs; 2809 PetscBool flg_row=PETSC_TRUE; 2810 2811 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2812 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2813 if (!flg_row) { 2814 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2815 } 2816 if (pcbddc->use_local_adj) { 2817 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2818 pcbddc->computed_rowadj = PETSC_TRUE; 2819 } else { /* just compute subdomain's connected components */ 2820 IS is_dummy; 2821 ISLocalToGlobalMapping l2gmap_dummy; 2822 PetscInt j,sum; 2823 PetscInt *cxadj,*cadjncy; 2824 const PetscInt *idxs; 2825 PCBDDCGraph graph; 2826 PetscBT is_on_boundary; 2827 2828 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2829 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2830 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2831 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2832 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2833 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2834 graph->xadj = xadj; 2835 graph->adjncy = adjncy; 2836 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2837 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2838 2839 if (pcbddc->dbg_flag) { 2840 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2841 for (i=0;i<graph->ncc;i++) { 2842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2843 } 2844 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2845 } 2846 2847 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2848 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2849 for (i=0;i<pcis->n_B;i++) { 2850 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2851 } 2852 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2853 2854 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2855 sum = 0; 2856 for (i=0;i<graph->ncc;i++) { 2857 PetscInt sizecc = 0; 2858 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2859 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2860 sizecc++; 2861 } 2862 } 2863 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2864 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2865 cxadj[graph->queue[j]] = sizecc; 2866 } 2867 } 2868 sum += sizecc*sizecc; 2869 } 2870 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2871 sum = 0; 2872 for (i=0;i<nvtxs;i++) { 2873 PetscInt temp = cxadj[i]; 2874 cxadj[i] = sum; 2875 sum += temp; 2876 } 2877 cxadj[nvtxs] = sum; 2878 for (i=0;i<graph->ncc;i++) { 2879 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2880 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2881 PetscInt k,sizecc = 0; 2882 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2883 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2884 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2885 sizecc++; 2886 } 2887 } 2888 } 2889 } 2890 } 2891 if (nvtxs) { 2892 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2893 } else { 2894 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2895 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2896 } 2897 graph->xadj = 0; 2898 graph->adjncy = 0; 2899 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2900 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2901 } 2902 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2903 if (!flg_row) { 2904 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2905 } 2906 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2907 } 2908 2909 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2910 vertex_size = 1; 2911 if (pcbddc->user_provided_isfordofs) { 2912 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2913 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2914 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2915 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2916 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2917 } 2918 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2919 pcbddc->n_ISForDofs = 0; 2920 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2921 } 2922 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2923 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2924 } else { 2925 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2926 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2927 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2928 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2929 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2930 } 2931 } 2932 } 2933 2934 /* Setup of Graph */ 2935 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2936 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2937 } 2938 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2939 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2940 } 2941 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2942 2943 /* Graph's connected components analysis */ 2944 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2945 2946 /* print some info to stdout */ 2947 if (pcbddc->dbg_flag) { 2948 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2949 } 2950 2951 /* mark topography has done */ 2952 pcbddc->recompute_topography = PETSC_FALSE; 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2958 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2959 { 2960 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2961 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2962 PetscErrorCode ierr; 2963 2964 PetscFunctionBegin; 2965 n = 0; 2966 vertices = 0; 2967 if (pcbddc->ConstraintMatrix) { 2968 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2969 for (i=0;i<local_primal_size;i++) { 2970 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2971 if (size_of_constraint == 1) n++; 2972 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2973 } 2974 if (vertices_idx) { 2975 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2976 n = 0; 2977 for (i=0;i<local_primal_size;i++) { 2978 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2979 if (size_of_constraint == 1) { 2980 vertices[n++]=row_cmat_indices[0]; 2981 } 2982 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2983 } 2984 } 2985 } 2986 *n_vertices = n; 2987 if (vertices_idx) *vertices_idx = vertices; 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2993 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2994 { 2995 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2996 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2997 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2998 PetscBT touched; 2999 PetscErrorCode ierr; 3000 3001 /* This function assumes that the number of local constraints per connected component 3002 is not greater than the number of nodes defined for the connected component 3003 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3004 PetscFunctionBegin; 3005 n = 0; 3006 constraints_index = 0; 3007 if (pcbddc->ConstraintMatrix) { 3008 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3009 max_size_of_constraint = 0; 3010 for (i=0;i<local_primal_size;i++) { 3011 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3012 if (size_of_constraint > 1) { 3013 n++; 3014 } 3015 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3016 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3017 } 3018 if (constraints_idx) { 3019 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3020 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3021 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3022 n = 0; 3023 for (i=0;i<local_primal_size;i++) { 3024 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3025 if (size_of_constraint > 1) { 3026 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3027 /* find first untouched local node */ 3028 j = 0; 3029 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3030 min_index = row_cmat_global_indices[j]; 3031 min_loc = j; 3032 /* search the minimum among nodes not yet touched on the connected component 3033 since there can be more than one constraint on a single cc */ 3034 for (j=1;j<size_of_constraint;j++) { 3035 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3036 min_index = row_cmat_global_indices[j]; 3037 min_loc = j; 3038 } 3039 } 3040 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3041 constraints_index[n++] = row_cmat_indices[min_loc]; 3042 } 3043 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3044 } 3045 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3046 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3047 } 3048 } 3049 *n_constraints = n; 3050 if (constraints_idx) *constraints_idx = constraints_index; 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "PCBDDCSubsetNumbering" 3056 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3057 { 3058 Vec local_vec,global_vec; 3059 IS seqis,paris; 3060 VecScatter scatter_ctx; 3061 PetscScalar *array; 3062 PetscInt *temp_global_dofs; 3063 PetscScalar globalsum; 3064 PetscInt i,j,s; 3065 PetscInt nlocals,first_index,old_index,max_local; 3066 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3067 PetscMPIInt *dof_sizes,*dof_displs; 3068 PetscBool first_found; 3069 PetscErrorCode ierr; 3070 3071 PetscFunctionBegin; 3072 /* mpi buffers */ 3073 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3074 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3075 j = ( !rank_prec_comm ? size_prec_comm : 0); 3076 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3077 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3078 /* get maximum size of subset */ 3079 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3080 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3081 max_local = 0; 3082 for (i=0;i<n_local_dofs;i++) { 3083 if (max_local < temp_global_dofs[i] ) { 3084 max_local = temp_global_dofs[i]; 3085 } 3086 } 3087 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3088 max_global++; 3089 max_local = 0; 3090 for (i=0;i<n_local_dofs;i++) { 3091 if (max_local < local_dofs[i] ) { 3092 max_local = local_dofs[i]; 3093 } 3094 } 3095 max_local++; 3096 /* allocate workspace */ 3097 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3098 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3099 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3100 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3101 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3102 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3103 /* create scatter */ 3104 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3105 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3106 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3107 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3108 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3109 /* init array */ 3110 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3111 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3112 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3113 if (local_dofs_mult) { 3114 for (i=0;i<n_local_dofs;i++) { 3115 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3116 } 3117 } else { 3118 for (i=0;i<n_local_dofs;i++) { 3119 array[local_dofs[i]]=1.0; 3120 } 3121 } 3122 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3123 /* scatter into global vec and get total number of global dofs */ 3124 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3125 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3126 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3127 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3128 /* Fill global_vec with cumulative function for global numbering */ 3129 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3130 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3131 nlocals = 0; 3132 first_index = -1; 3133 first_found = PETSC_FALSE; 3134 for (i=0;i<s;i++) { 3135 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3136 first_found = PETSC_TRUE; 3137 first_index = i; 3138 } 3139 nlocals += (PetscInt)PetscRealPart(array[i]); 3140 } 3141 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3142 if (!rank_prec_comm) { 3143 dof_displs[0]=0; 3144 for (i=1;i<size_prec_comm;i++) { 3145 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3146 } 3147 } 3148 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3149 if (first_found) { 3150 array[first_index] += (PetscScalar)nlocals; 3151 old_index = first_index; 3152 for (i=first_index+1;i<s;i++) { 3153 if (PetscRealPart(array[i]) > 0.1) { 3154 array[i] += array[old_index]; 3155 old_index = i; 3156 } 3157 } 3158 } 3159 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3160 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3161 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3162 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3163 /* get global ordering of local dofs */ 3164 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3165 if (local_dofs_mult) { 3166 for (i=0;i<n_local_dofs;i++) { 3167 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3168 } 3169 } else { 3170 for (i=0;i<n_local_dofs;i++) { 3171 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3172 } 3173 } 3174 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3175 /* free workspace */ 3176 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3177 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3178 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3179 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3180 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3181 /* return pointer to global ordering of local dofs */ 3182 *global_numbering_subset = temp_global_dofs; 3183 PetscFunctionReturn(0); 3184 } 3185 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3188 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3189 { 3190 PetscInt i,j; 3191 PetscScalar *alphas; 3192 PetscErrorCode ierr; 3193 3194 PetscFunctionBegin; 3195 /* this implements stabilized Gram-Schmidt */ 3196 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3197 for (i=0;i<n;i++) { 3198 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3199 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3200 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3201 } 3202 ierr = PetscFree(alphas);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatISGetSubassemblingPattern" 3208 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3209 { 3210 Mat subdomain_adj; 3211 IS new_ranks,ranks_send_to; 3212 MatPartitioning partitioner; 3213 Mat_IS *matis; 3214 PetscInt n_neighs,*neighs,*n_shared,**shared; 3215 PetscInt prank; 3216 PetscMPIInt size,rank,color; 3217 PetscInt *xadj,*adjncy,*oldranks; 3218 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3219 PetscInt i,local_size,threshold=0; 3220 PetscErrorCode ierr; 3221 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3222 PetscSubcomm subcomm; 3223 3224 PetscFunctionBegin; 3225 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3226 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3227 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3228 3229 /* Get info on mapping */ 3230 matis = (Mat_IS*)(mat->data); 3231 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3232 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3233 3234 /* build local CSR graph of subdomains' connectivity */ 3235 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3236 xadj[0] = 0; 3237 xadj[1] = PetscMax(n_neighs-1,0); 3238 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3239 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3240 3241 if (threshold) { 3242 PetscInt xadj_count = 0; 3243 for (i=1;i<n_neighs;i++) { 3244 if (n_shared[i] > threshold) { 3245 adjncy[xadj_count] = neighs[i]; 3246 adjncy_wgt[xadj_count] = n_shared[i]; 3247 xadj_count++; 3248 } 3249 } 3250 xadj[1] = xadj_count; 3251 } else { 3252 if (xadj[1]) { 3253 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3254 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3255 } 3256 } 3257 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3258 if (use_square) { 3259 for (i=0;i<xadj[1];i++) { 3260 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3261 } 3262 } 3263 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3264 3265 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3266 3267 /* 3268 Restrict work on active processes only. 3269 */ 3270 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3271 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3272 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3273 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3274 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3275 if (color) { 3276 ierr = PetscFree(xadj);CHKERRQ(ierr); 3277 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3278 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3279 } else { 3280 PetscInt coarsening_ratio; 3281 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3282 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3283 prank = rank; 3284 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3285 /* 3286 for (i=0;i<size;i++) { 3287 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3288 } 3289 */ 3290 for (i=0;i<xadj[1];i++) { 3291 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3292 } 3293 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3294 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3295 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3296 3297 /* Partition */ 3298 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3299 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3300 if (use_vwgt) { 3301 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3302 v_wgt[0] = local_size; 3303 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3304 } 3305 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3306 coarsening_ratio = size/n_subdomains; 3307 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3308 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3309 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3310 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3311 3312 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3313 if (contiguous) { 3314 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3315 } else { 3316 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3317 } 3318 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3319 /* clean up */ 3320 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3321 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3322 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3323 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3324 } 3325 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3326 3327 /* assemble parallel IS for sends */ 3328 i = 1; 3329 if (color) i=0; 3330 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3331 3332 /* get back IS */ 3333 *is_sends = ranks_send_to; 3334 PetscFunctionReturn(0); 3335 } 3336 3337 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatISSubassemble" 3341 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3342 { 3343 Mat local_mat; 3344 Mat_IS *matis; 3345 IS is_sends_internal; 3346 PetscInt rows,cols,new_local_rows; 3347 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3348 PetscBool ismatis,isdense,newisdense,destroy_mat; 3349 ISLocalToGlobalMapping l2gmap; 3350 PetscInt* l2gmap_indices; 3351 const PetscInt* is_indices; 3352 MatType new_local_type; 3353 /* buffers */ 3354 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3355 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3356 PetscInt *recv_buffer_idxs_local; 3357 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3358 /* MPI */ 3359 MPI_Comm comm,comm_n; 3360 PetscSubcomm subcomm; 3361 PetscMPIInt n_sends,n_recvs,commsize; 3362 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3363 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3364 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3365 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3366 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3367 PetscErrorCode ierr; 3368 3369 PetscFunctionBegin; 3370 /* TODO: add missing checks */ 3371 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3372 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3373 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3374 PetscValidLogicalCollectiveInt(mat,nis,7); 3375 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3376 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3377 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3378 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3379 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3380 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3381 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3382 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3383 PetscInt mrows,mcols,mnrows,mncols; 3384 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3385 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3386 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3387 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3388 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3389 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3390 } 3391 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3392 PetscValidLogicalCollectiveInt(mat,bs,0); 3393 /* prepare IS for sending if not provided */ 3394 if (!is_sends) { 3395 PetscBool pcontig = PETSC_TRUE; 3396 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3397 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3398 } else { 3399 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3400 is_sends_internal = is_sends; 3401 } 3402 3403 /* get pointer of MATIS data */ 3404 matis = (Mat_IS*)mat->data; 3405 3406 /* get comm */ 3407 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3408 3409 /* compute number of sends */ 3410 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3411 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3412 3413 /* compute number of receives */ 3414 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3415 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3416 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3417 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3418 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3419 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3420 ierr = PetscFree(iflags);CHKERRQ(ierr); 3421 3422 /* restrict comm if requested */ 3423 subcomm = 0; 3424 destroy_mat = PETSC_FALSE; 3425 if (restrict_comm) { 3426 PetscMPIInt color,rank,subcommsize; 3427 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3428 color = 0; 3429 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3430 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3431 subcommsize = commsize - subcommsize; 3432 /* check if reuse has been requested */ 3433 if (reuse == MAT_REUSE_MATRIX) { 3434 if (*mat_n) { 3435 PetscMPIInt subcommsize2; 3436 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3437 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3438 comm_n = PetscObjectComm((PetscObject)*mat_n); 3439 } else { 3440 comm_n = PETSC_COMM_SELF; 3441 } 3442 } else { /* MAT_INITIAL_MATRIX */ 3443 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3444 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3445 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3446 comm_n = subcomm->comm; 3447 } 3448 /* flag to destroy *mat_n if not significative */ 3449 if (color) destroy_mat = PETSC_TRUE; 3450 } else { 3451 comm_n = comm; 3452 } 3453 3454 /* prepare send/receive buffers */ 3455 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3456 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3457 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3458 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3459 if (nis) { 3460 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3461 } 3462 3463 /* Get data from local matrices */ 3464 if (!isdense) { 3465 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3466 /* TODO: See below some guidelines on how to prepare the local buffers */ 3467 /* 3468 send_buffer_vals should contain the raw values of the local matrix 3469 send_buffer_idxs should contain: 3470 - MatType_PRIVATE type 3471 - PetscInt size_of_l2gmap 3472 - PetscInt global_row_indices[size_of_l2gmap] 3473 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3474 */ 3475 } else { 3476 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3477 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3479 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3480 send_buffer_idxs[1] = i; 3481 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3482 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3483 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3484 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3485 for (i=0;i<n_sends;i++) { 3486 ilengths_vals[is_indices[i]] = len*len; 3487 ilengths_idxs[is_indices[i]] = len+2; 3488 } 3489 } 3490 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3491 /* additional is (if any) */ 3492 if (nis) { 3493 PetscMPIInt psum; 3494 PetscInt j; 3495 for (j=0,psum=0;j<nis;j++) { 3496 PetscInt plen; 3497 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3498 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3499 psum += len+1; /* indices + lenght */ 3500 } 3501 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3502 for (j=0,psum=0;j<nis;j++) { 3503 PetscInt plen; 3504 const PetscInt *is_array_idxs; 3505 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3506 send_buffer_idxs_is[psum] = plen; 3507 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3508 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3509 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3510 psum += plen+1; /* indices + lenght */ 3511 } 3512 for (i=0;i<n_sends;i++) { 3513 ilengths_idxs_is[is_indices[i]] = psum; 3514 } 3515 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3516 } 3517 3518 buf_size_idxs = 0; 3519 buf_size_vals = 0; 3520 buf_size_idxs_is = 0; 3521 for (i=0;i<n_recvs;i++) { 3522 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3523 buf_size_vals += (PetscInt)olengths_vals[i]; 3524 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3525 } 3526 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3527 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3528 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3529 3530 /* get new tags for clean communications */ 3531 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3532 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3533 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3534 3535 /* allocate for requests */ 3536 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3537 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3538 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3539 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3540 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3541 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3542 3543 /* communications */ 3544 ptr_idxs = recv_buffer_idxs; 3545 ptr_vals = recv_buffer_vals; 3546 ptr_idxs_is = recv_buffer_idxs_is; 3547 for (i=0;i<n_recvs;i++) { 3548 source_dest = onodes[i]; 3549 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3550 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3551 ptr_idxs += olengths_idxs[i]; 3552 ptr_vals += olengths_vals[i]; 3553 if (nis) { 3554 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3555 ptr_idxs_is += olengths_idxs_is[i]; 3556 } 3557 } 3558 for (i=0;i<n_sends;i++) { 3559 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3560 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3561 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3562 if (nis) { 3563 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3564 } 3565 } 3566 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3567 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3568 3569 /* assemble new l2g map */ 3570 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3571 ptr_idxs = recv_buffer_idxs; 3572 new_local_rows = 0; 3573 for (i=0;i<n_recvs;i++) { 3574 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3575 ptr_idxs += olengths_idxs[i]; 3576 } 3577 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3578 ptr_idxs = recv_buffer_idxs; 3579 new_local_rows = 0; 3580 for (i=0;i<n_recvs;i++) { 3581 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3582 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3583 ptr_idxs += olengths_idxs[i]; 3584 } 3585 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3586 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3587 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3588 3589 /* infer new local matrix type from received local matrices type */ 3590 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3591 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3592 if (n_recvs) { 3593 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3594 ptr_idxs = recv_buffer_idxs; 3595 for (i=0;i<n_recvs;i++) { 3596 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3597 new_local_type_private = MATAIJ_PRIVATE; 3598 break; 3599 } 3600 ptr_idxs += olengths_idxs[i]; 3601 } 3602 switch (new_local_type_private) { 3603 case MATDENSE_PRIVATE: 3604 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3605 new_local_type = MATSEQAIJ; 3606 bs = 1; 3607 } else { /* if I receive only 1 dense matrix */ 3608 new_local_type = MATSEQDENSE; 3609 bs = 1; 3610 } 3611 break; 3612 case MATAIJ_PRIVATE: 3613 new_local_type = MATSEQAIJ; 3614 bs = 1; 3615 break; 3616 case MATBAIJ_PRIVATE: 3617 new_local_type = MATSEQBAIJ; 3618 break; 3619 case MATSBAIJ_PRIVATE: 3620 new_local_type = MATSEQSBAIJ; 3621 break; 3622 default: 3623 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3624 break; 3625 } 3626 } else { /* by default, new_local_type is seqdense */ 3627 new_local_type = MATSEQDENSE; 3628 bs = 1; 3629 } 3630 3631 /* create MATIS object if needed */ 3632 if (reuse == MAT_INITIAL_MATRIX) { 3633 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3634 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3635 } else { 3636 /* it also destroys the local matrices */ 3637 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3638 } 3639 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3640 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3641 3642 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3643 3644 /* Global to local map of received indices */ 3645 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3646 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3647 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3648 3649 /* restore attributes -> type of incoming data and its size */ 3650 buf_size_idxs = 0; 3651 for (i=0;i<n_recvs;i++) { 3652 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3653 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3654 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3655 } 3656 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3657 3658 /* set preallocation */ 3659 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3660 if (!newisdense) { 3661 PetscInt *new_local_nnz=0; 3662 3663 ptr_vals = recv_buffer_vals; 3664 ptr_idxs = recv_buffer_idxs_local; 3665 if (n_recvs) { 3666 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3667 } 3668 for (i=0;i<n_recvs;i++) { 3669 PetscInt j; 3670 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3671 for (j=0;j<*(ptr_idxs+1);j++) { 3672 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3673 } 3674 } else { 3675 /* TODO */ 3676 } 3677 ptr_idxs += olengths_idxs[i]; 3678 } 3679 if (new_local_nnz) { 3680 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3681 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3682 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3683 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3684 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3685 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3686 } else { 3687 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3688 } 3689 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3690 } else { 3691 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3692 } 3693 3694 /* set values */ 3695 ptr_vals = recv_buffer_vals; 3696 ptr_idxs = recv_buffer_idxs_local; 3697 for (i=0;i<n_recvs;i++) { 3698 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3699 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3700 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3701 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3702 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3703 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3704 } else { 3705 /* TODO */ 3706 } 3707 ptr_idxs += olengths_idxs[i]; 3708 ptr_vals += olengths_vals[i]; 3709 } 3710 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3713 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3714 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3715 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3716 3717 #if 0 3718 if (!restrict_comm) { /* check */ 3719 Vec lvec,rvec; 3720 PetscReal infty_error; 3721 3722 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3723 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3724 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3725 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3726 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3727 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3728 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3729 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3730 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3731 } 3732 #endif 3733 3734 /* assemble new additional is (if any) */ 3735 if (nis) { 3736 PetscInt **temp_idxs,*count_is,j,psum; 3737 3738 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3739 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3740 ptr_idxs = recv_buffer_idxs_is; 3741 psum = 0; 3742 for (i=0;i<n_recvs;i++) { 3743 for (j=0;j<nis;j++) { 3744 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3745 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3746 psum += plen; 3747 ptr_idxs += plen+1; /* shift pointer to received data */ 3748 } 3749 } 3750 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3751 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3752 for (i=1;i<nis;i++) { 3753 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3754 } 3755 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3756 ptr_idxs = recv_buffer_idxs_is; 3757 for (i=0;i<n_recvs;i++) { 3758 for (j=0;j<nis;j++) { 3759 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3760 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3761 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3762 ptr_idxs += plen+1; /* shift pointer to received data */ 3763 } 3764 } 3765 for (i=0;i<nis;i++) { 3766 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3767 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3768 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3769 } 3770 ierr = PetscFree(count_is);CHKERRQ(ierr); 3771 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3772 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3773 } 3774 /* free workspace */ 3775 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3776 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3777 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3778 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3779 if (isdense) { 3780 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3781 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3782 } else { 3783 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3784 } 3785 if (nis) { 3786 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3787 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3788 } 3789 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3790 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3791 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3792 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3793 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3794 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3795 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3796 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3797 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3798 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3799 ierr = PetscFree(onodes);CHKERRQ(ierr); 3800 if (nis) { 3801 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3802 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3803 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3804 } 3805 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3806 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3807 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3808 for (i=0;i<nis;i++) { 3809 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3810 } 3811 } 3812 PetscFunctionReturn(0); 3813 } 3814 3815 /* temporary hack into ksp private data structure */ 3816 #include <petsc-private/kspimpl.h> 3817 3818 #undef __FUNCT__ 3819 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3820 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3821 { 3822 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3823 PC_IS *pcis = (PC_IS*)pc->data; 3824 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3825 MatNullSpace CoarseNullSpace=NULL; 3826 ISLocalToGlobalMapping coarse_islg; 3827 IS coarse_is,*isarray; 3828 PetscInt i,im_active=-1,active_procs=-1; 3829 PetscInt nis,nisdofs,nisneu; 3830 PC pc_temp; 3831 PCType coarse_pc_type; 3832 KSPType coarse_ksp_type; 3833 PetscBool multilevel_requested,multilevel_allowed; 3834 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3835 Mat t_coarse_mat_is; 3836 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3837 PetscMPIInt all_procs; 3838 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3839 PetscBool compute_vecs = PETSC_FALSE; 3840 PetscScalar *array; 3841 PetscErrorCode ierr; 3842 3843 PetscFunctionBegin; 3844 /* Assign global numbering to coarse dofs */ 3845 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3846 compute_vecs = PETSC_TRUE; 3847 PetscInt ocoarse_size; 3848 ocoarse_size = pcbddc->coarse_size; 3849 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3850 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3851 /* see if we can avoid some work */ 3852 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3853 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3854 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3855 coarse_reuse = PETSC_FALSE; 3856 } else { /* we can safely reuse already computed coarse matrix */ 3857 coarse_reuse = PETSC_TRUE; 3858 } 3859 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3860 coarse_reuse = PETSC_FALSE; 3861 } 3862 /* reset any subassembling information */ 3863 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3864 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3865 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3866 coarse_reuse = PETSC_TRUE; 3867 } 3868 3869 /* count "active" (i.e. with positive local size) and "void" processes */ 3870 im_active = !!(pcis->n); 3871 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3872 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3873 void_procs = all_procs-active_procs; 3874 csin_type_simple = PETSC_TRUE; 3875 redist = PETSC_FALSE; 3876 if (pcbddc->current_level && void_procs) { 3877 csin_ml = PETSC_TRUE; 3878 ncoarse_ml = void_procs; 3879 csin_ds = PETSC_TRUE; 3880 ncoarse_ds = void_procs; 3881 } else { 3882 csin_ml = PETSC_FALSE; 3883 ncoarse_ml = all_procs; 3884 if (void_procs) { 3885 csin_ds = PETSC_TRUE; 3886 ncoarse_ds = void_procs; 3887 csin_type_simple = PETSC_FALSE; 3888 } else { 3889 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3890 csin_ds = PETSC_TRUE; 3891 ncoarse_ds = pcbddc->redistribute_coarse; 3892 redist = PETSC_TRUE; 3893 } else { 3894 csin_ds = PETSC_FALSE; 3895 ncoarse_ds = all_procs; 3896 } 3897 } 3898 } 3899 3900 /* 3901 test if we can go multilevel: three conditions must be satisfied: 3902 - we have not exceeded the number of levels requested 3903 - we can actually subassemble the active processes 3904 - we can find a suitable number of MPI processes where we can place the subassembled problem 3905 */ 3906 multilevel_allowed = PETSC_FALSE; 3907 multilevel_requested = PETSC_FALSE; 3908 if (pcbddc->current_level < pcbddc->max_levels) { 3909 multilevel_requested = PETSC_TRUE; 3910 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3911 multilevel_allowed = PETSC_FALSE; 3912 } else { 3913 multilevel_allowed = PETSC_TRUE; 3914 } 3915 } 3916 /* determine number of process partecipating to coarse solver */ 3917 if (multilevel_allowed) { 3918 ncoarse = ncoarse_ml; 3919 csin = csin_ml; 3920 } else { 3921 ncoarse = ncoarse_ds; 3922 csin = csin_ds; 3923 } 3924 3925 /* creates temporary l2gmap and IS for coarse indexes */ 3926 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3927 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3928 3929 /* creates temporary MATIS object for coarse matrix */ 3930 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3931 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3932 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3933 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3934 #if 0 3935 { 3936 PetscViewer viewer; 3937 char filename[256]; 3938 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3939 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3940 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3941 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3942 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3943 } 3944 #endif 3945 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3946 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3947 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3948 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3949 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3950 3951 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3952 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3953 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3954 const PetscInt *idxs; 3955 ISLocalToGlobalMapping tmap; 3956 3957 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3958 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3959 /* allocate space for temporary storage */ 3960 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3961 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3962 /* allocate for IS array */ 3963 nisdofs = pcbddc->n_ISForDofsLocal; 3964 nisneu = !!pcbddc->NeumannBoundariesLocal; 3965 nis = nisdofs + nisneu; 3966 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3967 /* dofs splitting */ 3968 for (i=0;i<nisdofs;i++) { 3969 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3970 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3971 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3972 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3973 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3974 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3975 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3976 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3977 } 3978 /* neumann boundaries */ 3979 if (pcbddc->NeumannBoundariesLocal) { 3980 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3981 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3982 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3983 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3984 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3985 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3986 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3987 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3988 } 3989 /* free memory */ 3990 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3991 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3992 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3993 } else { 3994 nis = 0; 3995 nisdofs = 0; 3996 nisneu = 0; 3997 isarray = NULL; 3998 } 3999 /* destroy no longer needed map */ 4000 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4001 4002 /* restrict on coarse candidates (if needed) */ 4003 coarse_mat_is = NULL; 4004 if (csin) { 4005 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4006 if (redist) { 4007 PetscMPIInt rank; 4008 PetscInt spc,n_spc_p1,dest[1]; 4009 4010 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4011 spc = all_procs/pcbddc->redistribute_coarse; 4012 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 4013 if (rank > n_spc_p1*(spc+1)-1) { 4014 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4015 } else { 4016 dest[0] = rank/(spc+1); 4017 } 4018 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4019 } else { 4020 PetscInt j,tissize,*nisindices; 4021 PetscInt *coarse_candidates; 4022 const PetscInt* tisindices; 4023 /* get coarse candidates' ranks in pc communicator */ 4024 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4025 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4026 for (i=0,j=0;i<all_procs;i++) { 4027 if (!coarse_candidates[i]) { 4028 coarse_candidates[j]=i; 4029 j++; 4030 } 4031 } 4032 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 4033 /* get a suitable subassembling pattern */ 4034 if (csin_type_simple) { 4035 PetscMPIInt rank; 4036 PetscInt issize,isidx; 4037 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4038 if (im_active) { 4039 issize = 1; 4040 isidx = (PetscInt)rank; 4041 } else { 4042 issize = 0; 4043 isidx = -1; 4044 } 4045 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4046 } else { 4047 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4048 } 4049 if (pcbddc->dbg_flag) { 4050 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4052 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4054 for (i=0;i<j;i++) { 4055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4056 } 4057 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4058 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4059 } 4060 /* shift the pattern on coarse candidates */ 4061 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4062 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4063 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4064 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4065 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4066 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4067 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4068 } 4069 } 4070 if (pcbddc->dbg_flag) { 4071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4073 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4075 } 4076 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4077 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4078 } else { 4079 if (pcbddc->dbg_flag) { 4080 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4081 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4083 } 4084 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4085 coarse_mat_is = t_coarse_mat_is; 4086 } 4087 4088 /* create local to global scatters for coarse problem */ 4089 if (compute_vecs) { 4090 PetscInt lrows; 4091 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4092 if (coarse_mat_is) { 4093 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4094 } else { 4095 lrows = 0; 4096 } 4097 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4098 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4099 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4100 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4101 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4102 } 4103 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4104 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4105 4106 /* set defaults for coarse KSP and PC */ 4107 if (multilevel_allowed) { 4108 coarse_ksp_type = KSPRICHARDSON; 4109 coarse_pc_type = PCBDDC; 4110 } else { 4111 coarse_ksp_type = KSPPREONLY; 4112 coarse_pc_type = PCREDUNDANT; 4113 } 4114 4115 /* print some info if requested */ 4116 if (pcbddc->dbg_flag) { 4117 if (!multilevel_allowed) { 4118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4119 if (multilevel_requested) { 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4121 } else if (pcbddc->max_levels) { 4122 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4123 } 4124 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4125 } 4126 } 4127 4128 /* create the coarse KSP object only once with defaults */ 4129 if (coarse_mat_is) { 4130 MatReuse coarse_mat_reuse; 4131 PetscViewer dbg_viewer = NULL; 4132 if (pcbddc->dbg_flag) { 4133 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4134 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4135 } 4136 if (!pcbddc->coarse_ksp) { 4137 char prefix[256],str_level[16]; 4138 size_t len; 4139 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4140 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4141 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4142 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4143 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4144 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4145 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4146 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4147 /* prefix */ 4148 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4149 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4150 if (!pcbddc->current_level) { 4151 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4152 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4153 } else { 4154 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4155 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4156 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4157 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4158 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4159 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4160 } 4161 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4162 /* allow user customization */ 4163 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4164 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4165 } 4166 4167 /* get some info after set from options */ 4168 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4169 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4170 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4171 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4172 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4173 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4174 isbddc = PETSC_FALSE; 4175 } 4176 if (isredundant) { 4177 KSP inner_ksp; 4178 PC inner_pc; 4179 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4180 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4181 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4182 } 4183 4184 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4185 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4186 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4187 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4188 if (nisdofs) { 4189 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4190 for (i=0;i<nisdofs;i++) { 4191 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4192 } 4193 } 4194 if (nisneu) { 4195 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4196 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4197 } 4198 4199 /* assemble coarse matrix */ 4200 if (coarse_reuse) { 4201 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4202 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4203 coarse_mat_reuse = MAT_REUSE_MATRIX; 4204 } else { 4205 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4206 } 4207 if (isbddc || isnn) { 4208 if (pcbddc->coarsening_ratio > 1) { 4209 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4210 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4211 if (pcbddc->dbg_flag) { 4212 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4213 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4214 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4215 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4216 } 4217 } 4218 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4219 } else { 4220 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4221 coarse_mat = coarse_mat_is; 4222 } 4223 } else { 4224 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4225 } 4226 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4227 4228 /* propagate symmetry info to coarse matrix */ 4229 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4230 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4231 4232 /* set operators */ 4233 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4234 if (pcbddc->dbg_flag) { 4235 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4236 } 4237 } else { /* processes non partecipating to coarse solver (if any) */ 4238 coarse_mat = 0; 4239 } 4240 ierr = PetscFree(isarray);CHKERRQ(ierr); 4241 #if 0 4242 { 4243 PetscViewer viewer; 4244 char filename[256]; 4245 sprintf(filename,"coarse_mat.m"); 4246 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4247 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4248 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4249 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4250 } 4251 #endif 4252 4253 /* Compute coarse null space (special handling by BDDC only) */ 4254 if (pcbddc->NullSpace) { 4255 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4256 } 4257 4258 if (pcbddc->coarse_ksp) { 4259 Vec crhs,csol; 4260 PetscBool ispreonly; 4261 if (CoarseNullSpace) { 4262 if (isbddc) { 4263 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4264 } else { 4265 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4266 } 4267 } 4268 /* setup coarse ksp */ 4269 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4270 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4271 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4272 /* hack */ 4273 if (!csol) { 4274 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4275 } 4276 if (!crhs) { 4277 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4278 } 4279 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4280 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4281 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4282 KSP check_ksp; 4283 KSPType check_ksp_type; 4284 PC check_pc; 4285 Vec check_vec,coarse_vec; 4286 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4287 PetscInt its; 4288 PetscBool compute_eigs; 4289 PetscReal *eigs_r,*eigs_c; 4290 PetscInt neigs; 4291 const char *prefix; 4292 4293 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4294 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4295 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4296 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4297 if (ispreonly) { 4298 check_ksp_type = KSPPREONLY; 4299 compute_eigs = PETSC_FALSE; 4300 } else { 4301 check_ksp_type = KSPGMRES; 4302 compute_eigs = PETSC_TRUE; 4303 } 4304 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4305 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4306 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4307 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4308 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4309 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4310 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4311 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4312 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4313 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4314 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4315 /* create random vec */ 4316 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4317 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4318 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4319 if (CoarseNullSpace) { 4320 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4321 } 4322 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4323 /* solve coarse problem */ 4324 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4325 if (CoarseNullSpace) { 4326 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4327 } 4328 /* set eigenvalue estimation if preonly has not been requested */ 4329 if (compute_eigs) { 4330 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4331 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4332 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4333 lambda_max = eigs_r[neigs-1]; 4334 lambda_min = eigs_r[0]; 4335 if (pcbddc->use_coarse_estimates) { 4336 if (lambda_max>lambda_min) { 4337 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4338 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4339 } 4340 } 4341 } 4342 4343 /* check coarse problem residual error */ 4344 if (pcbddc->dbg_flag) { 4345 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4346 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4347 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4348 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4349 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4350 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4351 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4352 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4353 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4354 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4355 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4356 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4357 if (compute_eigs) { 4358 PetscReal lambda_max_s,lambda_min_s; 4359 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4360 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4361 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4362 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4363 for (i=0;i<neigs;i++) { 4364 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4365 } 4366 } 4367 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4368 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4369 } 4370 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4371 if (compute_eigs) { 4372 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4373 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4374 } 4375 } 4376 } 4377 /* print additional info */ 4378 if (pcbddc->dbg_flag) { 4379 /* waits until all processes reaches this point */ 4380 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4381 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4382 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4383 } 4384 4385 /* free memory */ 4386 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4387 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4388 PetscFunctionReturn(0); 4389 } 4390 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4393 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4394 { 4395 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4396 PC_IS* pcis = (PC_IS*)pc->data; 4397 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4398 PetscInt i,coarse_size; 4399 PetscInt *local_primal_indices; 4400 PetscErrorCode ierr; 4401 4402 PetscFunctionBegin; 4403 /* Compute global number of coarse dofs */ 4404 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4405 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4406 } 4407 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4408 4409 /* check numbering */ 4410 if (pcbddc->dbg_flag) { 4411 PetscScalar coarsesum,*array; 4412 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4413 4414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4415 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4417 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4418 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4419 for (i=0;i<pcbddc->local_primal_size;i++) { 4420 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4421 } 4422 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4423 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4424 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4425 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4426 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4427 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4428 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4429 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4430 for (i=0;i<pcis->n;i++) { 4431 if (array[i] == 1.0) { 4432 set_error = PETSC_TRUE; 4433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4434 } 4435 } 4436 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4437 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4438 for (i=0;i<pcis->n;i++) { 4439 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4440 } 4441 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4442 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4443 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4444 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4445 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4446 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4447 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4450 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4451 for (i=0;i<pcbddc->local_primal_size;i++) { 4452 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4453 } 4454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4455 } 4456 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4457 if (set_error_reduced) { 4458 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4459 } 4460 } 4461 /* get back data */ 4462 *coarse_size_n = coarse_size; 4463 *local_primal_indices_n = local_primal_indices; 4464 PetscFunctionReturn(0); 4465 } 4466 4467 #undef __FUNCT__ 4468 #define __FUNCT__ "PCBDDCGlobalToLocal" 4469 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4470 { 4471 IS localis_t; 4472 PetscInt i,lsize,*idxs,n; 4473 PetscScalar *vals; 4474 PetscErrorCode ierr; 4475 4476 PetscFunctionBegin; 4477 /* get indices in local ordering exploiting local to global map */ 4478 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4479 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4480 for (i=0;i<lsize;i++) vals[i] = 1.0; 4481 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4482 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4483 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4484 if (idxs) { /* multilevel guard */ 4485 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4486 } 4487 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4488 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4489 ierr = PetscFree(vals);CHKERRQ(ierr); 4490 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4491 /* now compute set in local ordering */ 4492 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4494 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4495 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4496 for (i=0,lsize=0;i<n;i++) { 4497 if (PetscRealPart(vals[i]) > 0.5) { 4498 lsize++; 4499 } 4500 } 4501 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4502 for (i=0,lsize=0;i<n;i++) { 4503 if (PetscRealPart(vals[i]) > 0.5) { 4504 idxs[lsize++] = i; 4505 } 4506 } 4507 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4509 *localis = localis_t; 4510 PetscFunctionReturn(0); 4511 } 4512 4513 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4514 #undef __FUNCT__ 4515 #define __FUNCT__ "PCBDDCMatMult_Private" 4516 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4517 { 4518 PCBDDCChange_ctx change_ctx; 4519 PetscErrorCode ierr; 4520 4521 PetscFunctionBegin; 4522 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4523 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4524 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4525 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4526 PetscFunctionReturn(0); 4527 } 4528 4529 #undef __FUNCT__ 4530 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4531 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4532 { 4533 PCBDDCChange_ctx change_ctx; 4534 PetscErrorCode ierr; 4535 4536 PetscFunctionBegin; 4537 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4538 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4539 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4540 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4541 PetscFunctionReturn(0); 4542 } 4543 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4546 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4547 { 4548 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4549 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4550 PetscInt *used_xadj,*used_adjncy; 4551 PetscBool free_used_adj; 4552 PetscErrorCode ierr; 4553 4554 PetscFunctionBegin; 4555 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4556 free_used_adj = PETSC_FALSE; 4557 if (pcbddc->sub_schurs_layers == -1) { 4558 used_xadj = NULL; 4559 used_adjncy = NULL; 4560 } else { 4561 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4562 used_xadj = pcbddc->mat_graph->xadj; 4563 used_adjncy = pcbddc->mat_graph->adjncy; 4564 } else if (pcbddc->computed_rowadj) { 4565 used_xadj = pcbddc->mat_graph->xadj; 4566 used_adjncy = pcbddc->mat_graph->adjncy; 4567 } else { 4568 Mat mat_adj; 4569 PetscBool flg_row=PETSC_TRUE; 4570 const PetscInt *xadj,*adjncy; 4571 PetscInt nvtxs; 4572 4573 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4574 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4575 if (!flg_row) { 4576 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4577 } 4578 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4579 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4580 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4581 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4582 if (!flg_row) { 4583 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4584 } 4585 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4586 free_used_adj = PETSC_TRUE; 4587 } 4588 } 4589 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4590 4591 /* free adjacency */ 4592 if (free_used_adj) { 4593 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4594 } 4595 PetscFunctionReturn(0); 4596 } 4597 4598 #undef __FUNCT__ 4599 #define __FUNCT__ "PCBDDCInitSubSchurs" 4600 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4601 { 4602 PC_IS *pcis=(PC_IS*)pc->data; 4603 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4604 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4605 PCBDDCGraph graph; 4606 Mat S_j; 4607 PetscErrorCode ierr; 4608 4609 PetscFunctionBegin; 4610 /* attach interface graph for determining subsets */ 4611 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4612 IS verticesIS; 4613 4614 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4615 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4616 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4617 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4618 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4619 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4620 /* 4621 if (pcbddc->dbg_flag) { 4622 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4623 } 4624 */ 4625 } else { 4626 graph = pcbddc->mat_graph; 4627 } 4628 4629 /* Create Schur complement matrix */ 4630 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4631 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4632 4633 /* sub_schurs init */ 4634 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4635 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4636 /* free graph struct */ 4637 if (pcbddc->sub_schurs_rebuild) { 4638 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4639 } 4640 PetscFunctionReturn(0); 4641 } 4642