1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 n = PetscMax(n,nmin); 66 cum += subset_size*n; 67 cum2 += n; 68 maxneigs = PetscMax(maxneigs,n); 69 } 70 } 71 72 if (mss) { 73 if (symmetric) { 74 PetscBLASInt B_itype = 1; 75 PetscBLASInt B_N = mss; 76 PetscScalar zero = 0.0; 77 PetscScalar eps = 0.0; /* dlamch? */ 78 79 B_lwork = -1; 80 S = NULL; 81 St = NULL; 82 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 83 #if defined(PETSC_USE_COMPLEX) 84 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 85 #else 86 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 87 #endif 88 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 89 ierr = PetscFPTrapPop();CHKERRQ(ierr); 90 } else { 91 /* TODO */ 92 } 93 } else { 94 lwork = 0; 95 } 96 97 nv = 0; 98 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 99 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 100 } 101 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 102 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 103 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 104 #if defined(PETSC_USE_COMPLEX) 105 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 106 #endif 107 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 108 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 109 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 110 nv+cum,&pcbddc->adaptive_constraints_idxs, 111 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 112 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 113 114 maxneigs = 0; 115 cum = cum2 = cumarray = 0; 116 if (sub_schurs->is_Ej_com) { 117 const PetscInt *idxs; 118 119 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 120 for (cum=0;cum<nv;cum++) { 121 pcbddc->adaptive_constraints_n[cum] = 1; 122 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 123 pcbddc->adaptive_constraints_ptrs[cum] = cum; 124 pcbddc->adaptive_constraints_data[cum] = 1.0; 125 } 126 cum2 = cum; 127 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 128 } 129 130 if (mss) { /* multilevel */ 131 if (pcbddc->use_deluxe_scaling) { 132 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 133 } else { 134 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 135 } 136 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 137 } 138 139 cumsubs = 0; 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,zero=0.0; 147 PetscBLASInt B_N; 148 149 /* S should be copied since we need it for deluxe scaling */ 150 if (symmetric) { 151 PetscInt j; 152 for (j=0;j<subset_size;j++) { 153 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 154 } 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 } else { 159 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 160 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 /* is always this the right matrix? */ 163 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 165 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 166 /* St = Starray+cumarray; */ 167 168 if (symmetric) { 169 PetscBLASInt B_itype = 1; 170 PetscBLASInt B_IL = 1, B_IU; 171 PetscScalar eps = -1.0; /* dlamch? */ 172 173 /* ask for eigenvalues lower than thresh */ 174 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 175 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 176 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 177 #if defined(PETSC_USE_COMPLEX) 178 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 179 #else 180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 181 #endif 182 ierr = PetscFPTrapPop();CHKERRQ(ierr); 183 if (B_ierr) { 184 if (B_ierr < 0 ) { 185 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 186 } else if (B_ierr <= B_N) { 187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 188 } else { 189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 190 } 191 } 192 193 if (B_neigs > nmax) { 194 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax); 195 B_neigs = nmax; 196 } 197 198 if (B_neigs < nmin) { 199 PetscBLASInt B_neigs2; 200 201 B_IL = B_neigs + 1; 202 ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr); 203 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 204 if (symmetric) { 205 PetscInt j; 206 for (j=0;j<subset_size;j++) { 207 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 208 } 209 for (j=0;j<subset_size;j++) { 210 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 211 } 212 } else { 213 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 214 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 217 #if defined(PETSC_USE_COMPLEX) 218 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 219 #else 220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 221 #endif 222 ierr = PetscFPTrapPop();CHKERRQ(ierr); 223 B_neigs += B_neigs2; 224 } 225 if (B_ierr) { 226 if (B_ierr < 0 ) { 227 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 228 } else if (B_ierr <= B_N) { 229 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 230 } else { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 232 } 233 } 234 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 235 for (j=0;j<B_neigs;j++) { 236 if (eigs[j] == 0.0) { 237 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 238 } else { 239 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 240 } 241 } 242 } else { 243 /* TODO */ 244 } 245 maxneigs = PetscMax(B_neigs,maxneigs); 246 pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs; 247 cumsubs++; 248 249 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 250 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 251 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 253 for (j=0;j<B_neigs;j++) { 254 #if 0 255 { 256 PetscBLASInt Blas_N,Blas_one = 1.0; 257 PetscScalar norm; 258 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 259 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 260 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 261 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 262 } else { 263 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 264 } 265 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 266 } 267 #endif 268 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 269 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 270 cum2 += subset_size; 271 } 272 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 273 } 274 /* shift for next computation */ 275 cumarray += subset_size*subset_size; 276 } 277 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 278 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 279 280 if (mss) { 281 if (pcbddc->use_deluxe_scaling) { 282 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 283 } else { 284 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 285 } 286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 287 } 288 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 289 #if defined(PETSC_USE_COMPLEX) 290 ierr = PetscFree(rwork);CHKERRQ(ierr); 291 #endif 292 if (pcbddc->dbg_flag) { 293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 294 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNCT__ 300 #define __FUNCT__ "PCBDDCSetUpSolvers" 301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 302 { 303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 304 PetscScalar *coarse_submat_vals; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 311 312 /* Setup local neumann solver ksp_R */ 313 /* PCBDDCSetUpLocalScatters should be called first! */ 314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 315 316 /* Change global null space passed in by the user if change of basis has been requested */ 317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 319 } 320 321 /* 322 Setup local correction and local part of coarse basis. 323 Gives back the dense local part of the coarse matrix in column major ordering 324 */ 325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 326 327 /* Compute total number of coarse nodes and setup coarse solver */ 328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 329 330 /* free */ 331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCResetCustomization" 337 PetscErrorCode PCBDDCResetCustomization(PC pc) 338 { 339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 345 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 346 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 347 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 348 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 349 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 350 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 351 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 352 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 353 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCResetTopography" 359 PetscErrorCode PCBDDCResetTopography(PC pc) 360 { 361 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 366 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 367 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 368 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 369 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 370 PetscFunctionReturn(0); 371 } 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "PCBDDCResetSolvers" 375 PetscErrorCode PCBDDCResetSolvers(PC pc) 376 { 377 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 378 PetscErrorCode ierr; 379 380 PetscFunctionBegin; 381 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 382 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 383 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 384 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 385 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 386 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 387 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 389 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 390 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 391 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 392 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 393 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 394 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 395 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 396 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 397 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 398 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 399 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 401 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 402 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 410 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 411 { 412 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 413 PC_IS *pcis = (PC_IS*)pc->data; 414 VecType impVecType; 415 PetscInt n_constraints,n_R,old_size; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 if (!pcbddc->ConstraintMatrix) { 420 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 421 } 422 /* get sizes */ 423 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 424 n_R = pcis->n-pcbddc->n_actual_vertices; 425 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 426 /* local work vectors (try to avoid unneeded work)*/ 427 /* R nodes */ 428 old_size = -1; 429 if (pcbddc->vec1_R) { 430 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 431 } 432 if (n_R != old_size) { 433 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 434 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 435 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 437 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 438 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 439 } 440 /* local primal dofs */ 441 old_size = -1; 442 if (pcbddc->vec1_P) { 443 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 444 } 445 if (pcbddc->local_primal_size != old_size) { 446 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 449 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 450 } 451 /* local explicit constraints */ 452 old_size = -1; 453 if (pcbddc->vec1_C) { 454 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 455 } 456 if (n_constraints && n_constraints != old_size) { 457 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 458 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 459 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 460 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 461 } 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "PCBDDCSetUpCorrection" 467 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 468 { 469 PetscErrorCode ierr; 470 /* pointers to pcis and pcbddc */ 471 PC_IS* pcis = (PC_IS*)pc->data; 472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 473 /* submatrices of local problem */ 474 Mat A_RV,A_VR,A_VV; 475 /* working matrices */ 476 Mat M1,M2,M3,C_CR; 477 /* working vectors */ 478 Vec vec1_C,vec2_C,vec1_V,vec2_V; 479 /* additional working stuff */ 480 IS is_aux; 481 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 482 const PetscScalar *array,*row_cmat_values; 483 const PetscInt *row_cmat_indices,*idx_R_local; 484 PetscInt *idx_V_B,*auxindices; 485 PetscInt n_vertices,n_constraints,size_of_constraint; 486 PetscInt i,j,n_R,n_D,n_B; 487 PetscBool unsymmetric_check; 488 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 489 MatType impMatType; 490 /* some shortcuts to scalars */ 491 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 492 /* for debugging purposes */ 493 PetscReal *coarsefunctions_errors,*constraints_errors; 494 495 PetscFunctionBegin; 496 /* get number of vertices (corners plus constraints with change of basis) 497 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 498 n_vertices = pcbddc->n_actual_vertices; 499 n_constraints = pcbddc->local_primal_size-n_vertices; 500 /* Set Non-overlapping dimensions */ 501 n_B = pcis->n_B; n_D = pcis->n - n_B; 502 n_R = pcis->n-n_vertices; 503 504 /* Set types for local objects needed by BDDC precondtioner */ 505 impMatType = MATSEQDENSE; 506 507 /* Allocating some extra storage just to be safe */ 508 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 509 for (i=0;i<pcis->n;i++) auxindices[i]=i; 510 511 /* vertices in boundary numbering */ 512 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 513 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 514 if (i != n_vertices) { 515 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 516 } 517 518 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 519 if (n_constraints) { 520 /* see if we can save some allocations */ 521 if (pcbddc->local_auxmat2) { 522 PetscInt on_R,on_constraints; 523 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 524 if (on_R != n_R || on_constraints != n_constraints) { 525 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 526 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 527 } 528 } 529 /* work vectors */ 530 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 531 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 532 /* auxiliary matrices */ 533 if (!pcbddc->local_auxmat2) { 534 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 535 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 536 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 537 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 538 } 539 540 /* Extract constraints on R nodes: C_{CR} */ 541 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 542 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 543 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 544 545 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 546 for (i=0;i<n_constraints;i++) { 547 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 548 /* Get row of constraint matrix in R numbering */ 549 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 550 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 551 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 552 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 553 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 554 /* Solve for row of constraint matrix in R numbering */ 555 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 556 /* Set values in local_auxmat2 */ 557 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 558 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 560 } 561 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 562 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 563 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 564 565 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 566 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 567 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 568 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 569 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 570 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 571 ierr = MatSetUp(M1);CHKERRQ(ierr); 572 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 573 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 574 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 575 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 577 ierr = MatDestroy(&M2);CHKERRQ(ierr); 578 ierr = MatDestroy(&M3);CHKERRQ(ierr); 579 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 580 if (!pcbddc->local_auxmat1) { 581 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 582 } else { 583 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 584 } 585 } 586 587 /* Get submatrices from subdomain matrix */ 588 if (n_vertices) { 589 PetscInt ibs,mbs; 590 PetscBool issbaij; 591 Mat newmat; 592 593 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 594 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 595 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 596 if (ibs != mbs) { /* need to convert to SEQAIJ */ 597 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 598 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 599 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 601 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 602 } else { 603 /* this is safe */ 604 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 606 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 607 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 608 /* which of the two approaches is faster? */ 609 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 610 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 611 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 612 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 613 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 614 } else { 615 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 617 } 618 } 619 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 620 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 621 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 622 } 623 624 /* Matrix of coarse basis functions (local) */ 625 if (pcbddc->coarse_phi_B) { 626 PetscInt on_B,on_primal; 627 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 628 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 629 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 630 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 631 } 632 } 633 if (pcbddc->coarse_phi_D) { 634 PetscInt on_D,on_primal; 635 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 636 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 637 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 638 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 639 } 640 } 641 if (!pcbddc->coarse_phi_B) { 642 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 643 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 644 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 645 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 646 } 647 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 648 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 649 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 650 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 651 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 652 } 653 654 if (pcbddc->dbg_flag) { 655 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 656 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 657 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 658 } 659 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 660 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 661 662 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 663 664 /* vertices */ 665 for (i=0;i<n_vertices;i++) { 666 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 667 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 668 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 669 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 670 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 671 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 672 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 673 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 674 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 675 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 676 if (n_constraints) { 677 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 678 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 679 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 680 } 681 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 682 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 683 684 /* Set values in coarse basis function and subdomain part of coarse_mat */ 685 /* coarse basis functions */ 686 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 687 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 688 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 689 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 690 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 691 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 692 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 693 if (pcbddc->switch_static || pcbddc->dbg_flag) { 694 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 697 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 698 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 699 } 700 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 701 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 702 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 703 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 704 if (n_constraints) { 705 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 706 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 707 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 708 } 709 710 /* check */ 711 if (pcbddc->dbg_flag) { 712 /* assemble subdomain vector on local nodes */ 713 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 714 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 715 if (n_R) { 716 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 717 } 718 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 719 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 720 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 721 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 722 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 723 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 724 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 725 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 726 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 727 if (n_constraints) { 728 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 729 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 730 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 731 } 732 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 733 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 734 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 735 /* check saddle point solution */ 736 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 737 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 738 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 739 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 740 /* shift by the identity matrix */ 741 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 742 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 743 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 744 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 745 } 746 } 747 748 /* constraints */ 749 for (i=0;i<n_constraints;i++) { 750 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 751 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 752 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 753 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 754 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 755 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 756 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 757 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 758 if (n_vertices) { 759 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 760 } 761 /* Set values in coarse basis function and subdomain part of coarse_mat */ 762 /* coarse basis functions */ 763 j = i+n_vertices; /* don't touch this! */ 764 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 765 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 766 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 768 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 769 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 770 if (pcbddc->switch_static || pcbddc->dbg_flag) { 771 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 772 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 774 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 775 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 776 } 777 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 778 if (n_vertices) { 779 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 780 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 781 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 782 } 783 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 784 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 785 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 786 787 if (pcbddc->dbg_flag) { 788 /* assemble subdomain vector on nodes */ 789 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 790 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 791 if (n_R) { 792 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 793 } 794 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 795 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 796 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 797 /* assemble subdomain vector of lagrange multipliers */ 798 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 799 if (n_vertices) { 800 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 801 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 802 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 803 } 804 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 805 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 806 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 807 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 808 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 809 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 810 /* check saddle point solution */ 811 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 812 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 813 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 814 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 815 /* shift by the identity matrix */ 816 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 817 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 818 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 819 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 820 } 821 } 822 /* call assembling routines for local coarse basis */ 823 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 825 if (pcbddc->switch_static || pcbddc->dbg_flag) { 826 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 } 829 830 /* compute other basis functions for non-symmetric problems */ 831 if (!pcbddc->issym) { 832 if (!pcbddc->coarse_psi_B) { 833 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 834 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 835 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 836 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 837 } 838 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 839 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 840 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 841 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 842 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 843 } 844 for (i=0;i<pcbddc->local_primal_size;i++) { 845 if (n_constraints) { 846 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 847 for (j=0;j<n_constraints;j++) { 848 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 849 } 850 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 851 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 852 } 853 if (i<n_vertices) { 854 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 855 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 856 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 857 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 858 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 859 if (n_constraints) { 860 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 864 } 865 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 866 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 870 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 871 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 872 if (i<n_vertices) { 873 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 874 } 875 if (pcbddc->switch_static || pcbddc->dbg_flag) { 876 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 879 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 880 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 881 } 882 883 if (pcbddc->dbg_flag) { 884 /* assemble subdomain vector on nodes */ 885 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 886 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 887 if (n_R) { 888 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 889 } 890 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 891 if (i<n_vertices) { 892 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 893 } 894 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 895 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 896 /* assemble subdomain vector of lagrange multipliers */ 897 for (j=0;j<pcbddc->local_primal_size;j++) { 898 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 899 } 900 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 901 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 902 /* check saddle point solution */ 903 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 904 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 905 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 906 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 907 /* shift by the identity matrix */ 908 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 909 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 910 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 911 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 912 } 913 } 914 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 916 if (pcbddc->switch_static || pcbddc->dbg_flag) { 917 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 918 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 } 920 unsymmetric_check = PETSC_TRUE; 921 } else { /* take references to already computed coarse basis */ 922 unsymmetric_check = PETSC_FALSE; 923 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 924 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 925 if (pcbddc->coarse_phi_D) { 926 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 927 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 928 } 929 } 930 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 931 /* Checking coarse_sub_mat and coarse basis functios */ 932 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 933 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 934 if (pcbddc->dbg_flag) { 935 Mat coarse_sub_mat; 936 Mat AUXMAT,TM1,TM2,TM3,TM4; 937 Mat coarse_phi_D,coarse_phi_B; 938 Mat coarse_psi_D,coarse_psi_B; 939 Mat A_II,A_BB,A_IB,A_BI; 940 MatType checkmattype=MATSEQAIJ; 941 PetscReal real_value; 942 943 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 944 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 945 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 946 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 947 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 948 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 949 if (unsymmetric_check) { 950 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 951 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 952 } 953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 954 955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 957 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 958 if (unsymmetric_check) { 959 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 960 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 961 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 962 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 963 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 964 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 965 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 966 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 967 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 968 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 969 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 970 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 971 } else { 972 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 973 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 974 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 975 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 976 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 977 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 978 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 979 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 980 } 981 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 982 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 983 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 984 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 985 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 986 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 992 for (i=0;i<pcbddc->local_primal_size;i++) { 993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 994 } 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 996 for (i=0;i<pcbddc->local_primal_size;i++) { 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 998 } 999 if (unsymmetric_check) { 1000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 1001 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 1002 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 1005 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 1006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 1007 } 1008 } 1009 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1010 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1011 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1012 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1013 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1014 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1015 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1016 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1017 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1018 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1019 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1020 if (unsymmetric_check) { 1021 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1022 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1023 } 1024 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1025 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 1026 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1027 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1028 } 1029 /* free memory */ 1030 if (n_vertices) { 1031 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1032 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1033 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1034 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1035 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1036 } 1037 if (n_constraints) { 1038 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1039 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1040 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1041 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1042 } 1043 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1044 /* get back data */ 1045 *coarse_submat_vals_n = coarse_submat_vals; 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1051 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1052 { 1053 Mat *work_mat; 1054 IS isrow_s,iscol_s; 1055 PetscBool rsorted,csorted; 1056 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1061 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1062 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1063 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1064 1065 if (!rsorted) { 1066 const PetscInt *idxs; 1067 PetscInt *idxs_sorted,i; 1068 1069 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1070 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1071 for (i=0;i<rsize;i++) { 1072 idxs_perm_r[i] = i; 1073 } 1074 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1075 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1076 for (i=0;i<rsize;i++) { 1077 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1078 } 1079 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1080 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1083 isrow_s = isrow; 1084 } 1085 1086 if (!csorted) { 1087 if (isrow == iscol) { 1088 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1089 iscol_s = isrow_s; 1090 } else { 1091 const PetscInt *idxs; 1092 PetscInt *idxs_sorted,i; 1093 1094 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1095 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1096 for (i=0;i<csize;i++) { 1097 idxs_perm_c[i] = i; 1098 } 1099 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1100 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1101 for (i=0;i<csize;i++) { 1102 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1103 } 1104 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1106 } 1107 } else { 1108 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1109 iscol_s = iscol; 1110 } 1111 1112 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1113 1114 if (!rsorted || !csorted) { 1115 Mat new_mat; 1116 IS is_perm_r,is_perm_c; 1117 1118 if (!rsorted) { 1119 PetscInt *idxs_r,i; 1120 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1121 for (i=0;i<rsize;i++) { 1122 idxs_r[idxs_perm_r[i]] = i; 1123 } 1124 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1125 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1126 } else { 1127 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1128 } 1129 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1130 1131 if (!csorted) { 1132 if (isrow_s == iscol_s) { 1133 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1134 is_perm_c = is_perm_r; 1135 } else { 1136 PetscInt *idxs_c,i; 1137 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1138 for (i=0;i<csize;i++) { 1139 idxs_c[idxs_perm_c[i]] = i; 1140 } 1141 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1142 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1143 } 1144 } else { 1145 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1146 } 1147 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1148 1149 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1150 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1151 work_mat[0] = new_mat; 1152 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1153 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1154 } 1155 1156 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1157 *B = work_mat[0]; 1158 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1159 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1160 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1166 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1167 { 1168 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1169 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1170 Mat new_mat; 1171 IS is_local,is_global; 1172 PetscInt local_size; 1173 PetscBool isseqaij; 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1178 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1179 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1180 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1181 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1182 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1183 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1184 1185 /* check */ 1186 if (pcbddc->dbg_flag) { 1187 Vec x,x_change; 1188 PetscReal error; 1189 1190 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1191 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1192 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1193 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1194 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1195 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1196 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1197 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1198 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1199 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1202 ierr = VecDestroy(&x);CHKERRQ(ierr); 1203 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1204 } 1205 1206 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1207 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1208 if (isseqaij) { 1209 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1210 } else { 1211 Mat work_mat; 1212 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1213 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1214 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1215 } 1216 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1217 /* 1218 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1219 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1220 */ 1221 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1227 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1228 { 1229 PC_IS* pcis = (PC_IS*)(pc->data); 1230 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1231 IS is_aux1,is_aux2; 1232 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1233 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1234 PetscInt vbs,bs; 1235 PetscBT bitmask; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 /* 1240 No need to setup local scatters if 1241 - primal space is unchanged 1242 AND 1243 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1244 AND 1245 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1246 */ 1247 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1248 PetscFunctionReturn(0); 1249 } 1250 /* destroy old objects */ 1251 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1252 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1253 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1254 /* Set Non-overlapping dimensions */ 1255 n_B = pcis->n_B; n_D = pcis->n - n_B; 1256 n_vertices = pcbddc->n_actual_vertices; 1257 /* create auxiliary bitmask */ 1258 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1259 for (i=0;i<n_vertices;i++) { 1260 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1261 } 1262 1263 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1264 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1265 for (i=0, n_R=0; i<pcis->n; i++) { 1266 if (!PetscBTLookup(bitmask,i)) { 1267 idx_R_local[n_R] = i; 1268 n_R++; 1269 } 1270 } 1271 1272 /* Block code */ 1273 vbs = 1; 1274 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1275 if (bs>1 && !(n_vertices%bs)) { 1276 PetscBool is_blocked = PETSC_TRUE; 1277 PetscInt *vary; 1278 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1279 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1280 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1281 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1282 for (i=0; i<n_vertices; i++) { 1283 if (vary[i]!=0 && vary[i]!=bs) { 1284 is_blocked = PETSC_FALSE; 1285 break; 1286 } 1287 } 1288 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1289 vbs = bs; 1290 for (i=0;i<n_R/vbs;i++) { 1291 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1292 } 1293 } 1294 ierr = PetscFree(vary);CHKERRQ(ierr); 1295 } 1296 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1297 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1298 1299 /* print some info if requested */ 1300 if (pcbddc->dbg_flag) { 1301 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1308 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1309 } 1310 1311 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1312 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1313 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1314 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1315 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1316 for (i=0; i<n_D; i++) { 1317 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1318 } 1319 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1320 for (i=0, j=0; i<n_R; i++) { 1321 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1322 aux_array1[j++] = i; 1323 } 1324 } 1325 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1326 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1327 for (i=0, j=0; i<n_B; i++) { 1328 if (!PetscBTLookup(bitmask,is_indices[i])) { 1329 aux_array2[j++] = i; 1330 } 1331 } 1332 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1333 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1334 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1335 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1336 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1337 1338 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1339 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1340 for (i=0, j=0; i<n_R; i++) { 1341 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1342 aux_array1[j++] = i; 1343 } 1344 } 1345 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1346 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1347 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1348 } 1349 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1350 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1357 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1358 { 1359 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1360 PC_IS *pcis = (PC_IS*)pc->data; 1361 PC pc_temp; 1362 Mat A_RR; 1363 MatReuse reuse; 1364 PetscScalar m_one = -1.0; 1365 PetscReal value; 1366 PetscInt n_D,n_R,ibs,mbs; 1367 PetscBool use_exact,use_exact_reduced,issbaij; 1368 PetscErrorCode ierr; 1369 /* prefixes stuff */ 1370 char dir_prefix[256],neu_prefix[256],str_level[16]; 1371 size_t len; 1372 1373 PetscFunctionBegin; 1374 1375 /* compute prefixes */ 1376 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1377 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1378 if (!pcbddc->current_level) { 1379 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1380 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1381 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1382 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1383 } else { 1384 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1385 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1386 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1387 len -= 15; /* remove "pc_bddc_coarse_" */ 1388 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1389 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1390 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1391 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1392 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1393 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1394 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1395 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1396 } 1397 1398 /* DIRICHLET PROBLEM */ 1399 if (dirichlet) { 1400 /* Matrix for Dirichlet problem is pcis->A_II */ 1401 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1402 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1403 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1404 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1405 /* default */ 1406 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1407 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1409 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1410 if (issbaij) { 1411 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1412 } else { 1413 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1414 } 1415 /* Allow user's customization */ 1416 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1417 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1420 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1421 if (!n_D) { 1422 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1423 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1424 } 1425 /* Set Up KSP for Dirichlet problem of BDDC */ 1426 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1427 /* set ksp_D into pcis data */ 1428 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1429 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1430 pcis->ksp_D = pcbddc->ksp_D; 1431 } 1432 1433 /* NEUMANN PROBLEM */ 1434 A_RR = 0; 1435 if (neumann) { 1436 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1437 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1438 if (pcbddc->ksp_R) { /* already created ksp */ 1439 PetscInt nn_R; 1440 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1441 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1442 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1443 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1444 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1445 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1446 reuse = MAT_INITIAL_MATRIX; 1447 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1448 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1449 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1450 reuse = MAT_INITIAL_MATRIX; 1451 } else { /* safe to reuse the matrix */ 1452 reuse = MAT_REUSE_MATRIX; 1453 } 1454 } 1455 /* last check */ 1456 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1457 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1458 reuse = MAT_INITIAL_MATRIX; 1459 } 1460 } else { /* first time, so we need to create the matrix */ 1461 reuse = MAT_INITIAL_MATRIX; 1462 } 1463 /* extract A_RR */ 1464 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1465 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1466 if (ibs != mbs) { 1467 Mat newmat; 1468 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1469 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1470 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1471 } else { 1472 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1473 } 1474 if (!pcbddc->ksp_R) { /* create object if not present */ 1475 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1476 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1477 /* default */ 1478 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1479 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1480 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1481 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1482 if (issbaij) { 1483 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1484 } else { 1485 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1486 } 1487 /* Allow user's customization */ 1488 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1489 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1490 } 1491 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1492 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1493 if (!n_R) { 1494 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1495 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1496 } 1497 /* Set Up KSP for Neumann problem of BDDC */ 1498 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1499 } 1500 1501 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1502 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1503 if (pcbddc->dbg_flag) { 1504 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1505 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1506 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1507 } 1508 if (dirichlet) { /* Dirichlet */ 1509 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1510 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1511 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1512 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1513 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1514 /* need to be adapted? */ 1515 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1516 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1517 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1518 /* print info */ 1519 if (pcbddc->dbg_flag) { 1520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1522 } 1523 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1524 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1525 } 1526 } 1527 if (neumann) { /* Neumann */ 1528 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1529 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1530 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1531 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1532 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1533 /* need to be adapted? */ 1534 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1535 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1536 /* print info */ 1537 if (pcbddc->dbg_flag) { 1538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1539 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1540 } 1541 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1542 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1543 } 1544 } 1545 } 1546 /* free Neumann problem's matrix */ 1547 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1553 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1554 { 1555 PetscErrorCode ierr; 1556 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1557 1558 PetscFunctionBegin; 1559 if (applytranspose) { 1560 if (pcbddc->local_auxmat1) { 1561 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1562 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1563 } 1564 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1565 } else { 1566 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1567 if (pcbddc->local_auxmat1) { 1568 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1569 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1570 } 1571 } 1572 PetscFunctionReturn(0); 1573 } 1574 1575 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1578 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1579 { 1580 PetscErrorCode ierr; 1581 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1582 PC_IS* pcis = (PC_IS*) (pc->data); 1583 const PetscScalar zero = 0.0; 1584 1585 PetscFunctionBegin; 1586 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1587 if (applytranspose) { 1588 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1589 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1590 } else { 1591 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1592 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1593 } 1594 /* start communications from local primal nodes to rhs of coarse solver */ 1595 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1596 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 1599 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1600 /* TODO remove null space when doing multilevel */ 1601 if (pcbddc->coarse_ksp) { 1602 if (applytranspose) { 1603 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1604 } else { 1605 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1606 } 1607 } 1608 1609 /* Local solution on R nodes */ 1610 if (pcis->n) { 1611 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1614 if (pcbddc->switch_static) { 1615 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1616 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1617 } 1618 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1619 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1620 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 if (pcbddc->switch_static) { 1623 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 } 1626 } 1627 1628 /* communications from coarse sol to local primal nodes */ 1629 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1630 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1631 1632 /* Sum contributions from two levels */ 1633 if (applytranspose) { 1634 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1635 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1636 } else { 1637 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1638 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1646 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1647 { 1648 PetscErrorCode ierr; 1649 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1650 PetscScalar *array,*array2; 1651 Vec from,to; 1652 1653 PetscFunctionBegin; 1654 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1655 from = pcbddc->coarse_vec; 1656 to = pcbddc->vec1_P; 1657 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1658 Vec tvec; 1659 PetscInt lsize; 1660 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1661 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1662 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1663 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1664 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1665 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1666 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1667 } 1668 } else { /* from local to global -> put data in coarse right hand side */ 1669 from = pcbddc->vec1_P; 1670 to = pcbddc->coarse_vec; 1671 } 1672 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1678 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1679 { 1680 PetscErrorCode ierr; 1681 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1682 PetscScalar *array,*array2; 1683 Vec from,to; 1684 1685 PetscFunctionBegin; 1686 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1687 from = pcbddc->coarse_vec; 1688 to = pcbddc->vec1_P; 1689 } else { /* from local to global -> put data in coarse right hand side */ 1690 from = pcbddc->vec1_P; 1691 to = pcbddc->coarse_vec; 1692 } 1693 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1694 if (smode == SCATTER_FORWARD) { 1695 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1696 Vec tvec; 1697 PetscInt lsize; 1698 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1699 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1700 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1701 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1702 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1703 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1704 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1705 } 1706 } 1707 PetscFunctionReturn(0); 1708 } 1709 1710 /* uncomment for testing purposes */ 1711 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1714 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1715 { 1716 PetscErrorCode ierr; 1717 PC_IS* pcis = (PC_IS*)(pc->data); 1718 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1719 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1720 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1721 MatType impMatType=MATSEQAIJ; 1722 /* one and zero */ 1723 PetscScalar one=1.0,zero=0.0; 1724 /* space to store constraints and their local indices */ 1725 PetscScalar *temp_quadrature_constraint; 1726 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1727 /* iterators */ 1728 PetscInt i,j,k,total_counts,temp_start_ptr; 1729 /* stuff to store connected components stored in pcbddc->mat_graph */ 1730 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1731 PetscInt n_ISForFaces,n_ISForEdges; 1732 /* near null space stuff */ 1733 MatNullSpace nearnullsp; 1734 const Vec *nearnullvecs; 1735 Vec *localnearnullsp; 1736 PetscBool nnsp_has_cnst; 1737 PetscInt nnsp_size; 1738 PetscScalar *array; 1739 /* BLAS integers */ 1740 PetscBLASInt lwork,lierr; 1741 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1742 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1743 /* LAPACK working arrays for SVD or POD */ 1744 PetscBool skip_lapack; 1745 PetscScalar *work; 1746 PetscReal *singular_vals; 1747 #if defined(PETSC_USE_COMPLEX) 1748 PetscReal *rwork; 1749 #endif 1750 #if defined(PETSC_MISSING_LAPACK_GESVD) 1751 PetscBLASInt Blas_one_2=1; 1752 PetscScalar *temp_basis,*correlation_mat; 1753 #else 1754 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1755 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1756 #endif 1757 /* reuse */ 1758 PetscInt olocal_primal_size; 1759 PetscInt *oprimal_indices_local_idxs; 1760 /* change of basis */ 1761 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1762 PetscBool boolforchange,qr_needed; 1763 PetscBT touched,change_basis,qr_needed_idx; 1764 /* auxiliary stuff */ 1765 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1766 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1767 PetscScalar *temp_quadrature_constraint_work; 1768 /* some quantities */ 1769 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1770 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1771 1772 1773 PetscFunctionBegin; 1774 /* Destroy Mat objects computed previously */ 1775 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1776 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1777 /* TODO synch with adaptive selection */ 1778 /* Get index sets for faces, edges and vertices from graph */ 1779 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1780 /* free unneeded index sets */ 1781 if (!pcbddc->use_vertices) { 1782 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1783 } 1784 if (!pcbddc->use_edges) { 1785 for (i=0;i<n_ISForEdges;i++) { 1786 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1787 } 1788 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1789 n_ISForEdges = 0; 1790 } 1791 if (!pcbddc->use_faces) { 1792 for (i=0;i<n_ISForFaces;i++) { 1793 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1794 } 1795 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1796 n_ISForFaces = 0; 1797 } 1798 /* HACKS (the following two blocks of code) */ 1799 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1800 pcbddc->use_change_of_basis = PETSC_TRUE; 1801 if (!ISForEdges) { 1802 pcbddc->use_change_on_faces = PETSC_TRUE; 1803 } 1804 } 1805 if (pcbddc->NullSpace) { 1806 /* use_change_of_basis should be consistent among processors */ 1807 PetscBool tbool[2],gbool[2]; 1808 tbool [0] = pcbddc->use_change_of_basis; 1809 tbool [1] = pcbddc->use_change_on_faces; 1810 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1811 pcbddc->use_change_of_basis = gbool[0]; 1812 pcbddc->use_change_on_faces = gbool[1]; 1813 } 1814 /* print some info */ 1815 if (pcbddc->dbg_flag) { 1816 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1818 i = 0; 1819 if (ISForVertices) { 1820 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1821 } 1822 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1825 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1826 } 1827 1828 if (!pcbddc->adaptive_selection) { 1829 /* check if near null space is attached to global mat */ 1830 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1831 if (nearnullsp) { 1832 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1833 /* remove any stored info */ 1834 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1835 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1836 /* store information for BDDC solver reuse */ 1837 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1838 pcbddc->onearnullspace = nearnullsp; 1839 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1840 for (i=0;i<nnsp_size;i++) { 1841 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1842 } 1843 } else { /* if near null space is not provided BDDC uses constants by default */ 1844 nnsp_size = 0; 1845 nnsp_has_cnst = PETSC_TRUE; 1846 } 1847 /* get max number of constraints on a single cc */ 1848 max_constraints = nnsp_size; 1849 if (nnsp_has_cnst) max_constraints++; 1850 1851 /* 1852 Evaluate maximum storage size needed by the procedure 1853 - temp_indices will contain start index of each constraint stored as follows 1854 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1855 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1856 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1857 */ 1858 total_counts = n_ISForFaces+n_ISForEdges; 1859 total_counts *= max_constraints; 1860 n_vertices = 0; 1861 if (ISForVertices) { 1862 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1863 } 1864 total_counts += n_vertices; 1865 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1866 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1867 total_counts = 0; 1868 max_size_of_constraint = 0; 1869 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1870 if (i<n_ISForEdges) { 1871 used_IS = &ISForEdges[i]; 1872 } else { 1873 used_IS = &ISForFaces[i-n_ISForEdges]; 1874 } 1875 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1876 total_counts += j; 1877 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1878 } 1879 total_counts *= max_constraints; 1880 total_counts += n_vertices; 1881 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1882 /* get local part of global near null space vectors */ 1883 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1884 for (k=0;k<nnsp_size;k++) { 1885 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1886 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1887 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1888 } 1889 1890 /* whether or not to skip lapack calls */ 1891 skip_lapack = PETSC_TRUE; 1892 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1893 1894 /* allocate some auxiliary stuff */ 1895 if (!skip_lapack || pcbddc->use_qr_single) { 1896 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1897 } 1898 1899 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1900 if (!skip_lapack) { 1901 PetscScalar temp_work; 1902 1903 #if defined(PETSC_MISSING_LAPACK_GESVD) 1904 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1905 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1906 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1907 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1908 #if defined(PETSC_USE_COMPLEX) 1909 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1910 #endif 1911 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1912 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1913 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1914 lwork = -1; 1915 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1916 #if !defined(PETSC_USE_COMPLEX) 1917 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1918 #else 1919 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1920 #endif 1921 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1922 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1923 #else /* on missing GESVD */ 1924 /* SVD */ 1925 PetscInt max_n,min_n; 1926 max_n = max_size_of_constraint; 1927 min_n = max_constraints; 1928 if (max_size_of_constraint < max_constraints) { 1929 min_n = max_size_of_constraint; 1930 max_n = max_constraints; 1931 } 1932 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1933 #if defined(PETSC_USE_COMPLEX) 1934 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1935 #endif 1936 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1937 lwork = -1; 1938 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1939 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1940 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1942 #if !defined(PETSC_USE_COMPLEX) 1943 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1944 #else 1945 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1946 #endif 1947 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1949 #endif /* on missing GESVD */ 1950 /* Allocate optimal workspace */ 1951 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1952 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1953 } 1954 /* Now we can loop on constraining sets */ 1955 total_counts = 0; 1956 temp_indices[0] = 0; 1957 /* vertices */ 1958 if (ISForVertices) { 1959 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1960 if (nnsp_has_cnst) { /* consider all vertices */ 1961 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1962 for (i=0;i<n_vertices;i++) { 1963 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1964 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1965 total_counts++; 1966 } 1967 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1968 PetscBool used_vertex; 1969 for (i=0;i<n_vertices;i++) { 1970 used_vertex = PETSC_FALSE; 1971 k = 0; 1972 while (!used_vertex && k<nnsp_size) { 1973 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1974 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1975 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1976 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1977 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1978 total_counts++; 1979 used_vertex = PETSC_TRUE; 1980 } 1981 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1982 k++; 1983 } 1984 } 1985 } 1986 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1987 n_vertices = total_counts; 1988 } 1989 1990 /* edges and faces */ 1991 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1992 if (ncc<n_ISForEdges) { 1993 used_IS = &ISForEdges[ncc]; 1994 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1995 } else { 1996 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1997 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1998 } 1999 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2000 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2001 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2002 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2003 /* change of basis should not be performed on local periodic nodes */ 2004 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2005 if (nnsp_has_cnst) { 2006 PetscScalar quad_value; 2007 temp_constraints++; 2008 if (!pcbddc->use_nnsp_true) { 2009 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2010 } else { 2011 quad_value = 1.0; 2012 } 2013 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2014 for (j=0;j<size_of_constraint;j++) { 2015 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2016 } 2017 /* sort by global ordering if using lapack subroutines (not needed!) */ 2018 if (!skip_lapack || pcbddc->use_qr_single) { 2019 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2020 for (j=0;j<size_of_constraint;j++) { 2021 permutation[j]=j; 2022 } 2023 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2024 for (j=0;j<size_of_constraint;j++) { 2025 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2026 } 2027 for (j=0;j<size_of_constraint;j++) { 2028 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2029 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2030 } 2031 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2032 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2033 } 2034 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2035 total_counts++; 2036 } 2037 for (k=0;k<nnsp_size;k++) { 2038 PetscReal real_value; 2039 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2040 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2041 for (j=0;j<size_of_constraint;j++) { 2042 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2043 } 2044 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2045 /* check if array is null on the connected component */ 2046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2047 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2048 if (real_value > 0.0) { /* keep indices and values */ 2049 /* sort by global ordering if using lapack subroutines */ 2050 if (!skip_lapack || pcbddc->use_qr_single) { 2051 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2052 for (j=0;j<size_of_constraint;j++) { 2053 permutation[j]=j; 2054 } 2055 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2056 for (j=0;j<size_of_constraint;j++) { 2057 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2058 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2059 } 2060 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2061 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2062 } 2063 temp_constraints++; 2064 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2065 total_counts++; 2066 } 2067 } 2068 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2069 valid_constraints = temp_constraints; 2070 if (!pcbddc->use_nnsp_true && temp_constraints) { 2071 if (temp_constraints == 1) { /* just normalize the constraint */ 2072 PetscScalar norm; 2073 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2074 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2075 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2076 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2077 } else { /* perform SVD */ 2078 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2079 2080 #if defined(PETSC_MISSING_LAPACK_GESVD) 2081 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2082 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2083 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2084 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2085 from that computed using LAPACKgesvd 2086 -> This is due to a different computation of eigenvectors in LAPACKheev 2087 -> The quality of the POD-computed basis will be the same */ 2088 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2089 /* Store upper triangular part of correlation matrix */ 2090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2091 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2092 for (j=0;j<temp_constraints;j++) { 2093 for (k=0;k<j+1;k++) { 2094 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2095 } 2096 } 2097 /* compute eigenvalues and eigenvectors of correlation matrix */ 2098 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2099 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2100 #if !defined(PETSC_USE_COMPLEX) 2101 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2102 #else 2103 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2104 #endif 2105 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2106 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2107 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2108 j = 0; 2109 while (j < temp_constraints && singular_vals[j] < tol) j++; 2110 total_counts = total_counts-j; 2111 valid_constraints = temp_constraints-j; 2112 /* scale and copy POD basis into used quadrature memory */ 2113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2114 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2115 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2116 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2117 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2119 if (j<temp_constraints) { 2120 PetscInt ii; 2121 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2122 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2123 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2125 for (k=0;k<temp_constraints-j;k++) { 2126 for (ii=0;ii<size_of_constraint;ii++) { 2127 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2128 } 2129 } 2130 } 2131 #else /* on missing GESVD */ 2132 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2133 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2135 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2136 #if !defined(PETSC_USE_COMPLEX) 2137 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2138 #else 2139 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2140 #endif 2141 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2142 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2143 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2144 k = temp_constraints; 2145 if (k > size_of_constraint) k = size_of_constraint; 2146 j = 0; 2147 while (j < k && singular_vals[k-j-1] < tol) j++; 2148 valid_constraints = k-j; 2149 total_counts = total_counts-temp_constraints+valid_constraints; 2150 #endif /* on missing GESVD */ 2151 } 2152 } 2153 /* setting change_of_basis flag is safe now */ 2154 if (boolforchange) { 2155 for (j=0;j<valid_constraints;j++) { 2156 PetscBTSet(change_basis,total_counts-j-1); 2157 } 2158 } 2159 } 2160 /* free workspace */ 2161 if (!skip_lapack || pcbddc->use_qr_single) { 2162 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2163 } 2164 if (!skip_lapack) { 2165 ierr = PetscFree(work);CHKERRQ(ierr); 2166 #if defined(PETSC_USE_COMPLEX) 2167 ierr = PetscFree(rwork);CHKERRQ(ierr); 2168 #endif 2169 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2170 #if defined(PETSC_MISSING_LAPACK_GESVD) 2171 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2172 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2173 #endif 2174 } 2175 for (k=0;k<nnsp_size;k++) { 2176 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2177 } 2178 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2179 } else { 2180 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2181 PetscInt cum = 0; 2182 2183 total_counts = 0; 2184 n_vertices = 0; 2185 if (sub_schurs->is_Ej_com) { 2186 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2187 } 2188 max_constraints = 0; 2189 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2190 total_counts += pcbddc->adaptive_constraints_n[i]; 2191 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2192 } 2193 2194 temp_indices = pcbddc->adaptive_constraints_ptrs; 2195 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2196 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2197 2198 #if 0 2199 printf("Found %d totals\n",total_counts); 2200 for (i=0;i<total_counts;i++) { 2201 printf("const %d, start %d",i,temp_indices[i]); 2202 printf(" end %d:\n",temp_indices[i+1]); 2203 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2204 printf(" idxs %d",temp_indices_to_constraint[j]); 2205 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2206 } 2207 } 2208 #endif 2209 2210 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2211 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2212 /* Change of basis */ 2213 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2214 cum = 0; 2215 if (pcbddc->use_change_of_basis) { 2216 for (i=0;i<sub_schurs->n_subs;i++) { 2217 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2218 for (j=0;j<pcbddc->adaptive_constraints_n[i];j++) { 2219 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2220 } 2221 } 2222 cum += pcbddc->adaptive_constraints_n[i]; 2223 } 2224 } 2225 } 2226 2227 /* free index sets of faces, edges and vertices */ 2228 for (i=0;i<n_ISForFaces;i++) { 2229 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2230 } 2231 if (n_ISForFaces) { 2232 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2233 } 2234 for (i=0;i<n_ISForEdges;i++) { 2235 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2236 } 2237 if (n_ISForEdges) { 2238 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2239 } 2240 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2241 2242 /* map temp_indices_to_constraint in boundary numbering */ 2243 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2244 if (i != temp_indices[total_counts]) { 2245 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2246 } 2247 2248 /* set quantities in pcbddc data structure and store previous primal size */ 2249 /* n_vertices defines the number of subdomain corners in the primal space */ 2250 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2251 olocal_primal_size = pcbddc->local_primal_size; 2252 pcbddc->local_primal_size = total_counts; 2253 pcbddc->n_vertices = n_vertices; 2254 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2255 2256 /* Create constraint matrix */ 2257 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2258 /* so we need to set it up properly either with or without change of basis */ 2259 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2260 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2261 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2262 /* array to compute a local numbering of constraints : vertices first then constraints */ 2263 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2264 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2265 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2266 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2267 /* auxiliary stuff for basis change */ 2268 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2269 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2270 2271 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2272 total_primal_vertices=0; 2273 for (i=0;i<pcbddc->local_primal_size;i++) { 2274 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2275 if (size_of_constraint == 1) { 2276 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2277 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2278 aux_primal_minloc[total_primal_vertices]=0; 2279 total_primal_vertices++; 2280 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2281 PetscInt min_loc,min_index; 2282 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2283 /* find first untouched local node */ 2284 k = 0; 2285 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2286 min_index = global_indices[k]; 2287 min_loc = k; 2288 /* search the minimum among global nodes already untouched on the cc */ 2289 for (k=1;k<size_of_constraint;k++) { 2290 /* there can be more than one constraint on a single connected component */ 2291 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2292 min_index = global_indices[k]; 2293 min_loc = k; 2294 } 2295 } 2296 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2297 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2298 aux_primal_minloc[total_primal_vertices]=min_loc; 2299 total_primal_vertices++; 2300 } 2301 } 2302 /* determine if a QR strategy is needed for change of basis */ 2303 qr_needed = PETSC_FALSE; 2304 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2305 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2306 if (PetscBTLookup(change_basis,i)) { 2307 if (!pcbddc->use_qr_single) { 2308 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2309 j = 0; 2310 for (k=0;k<size_of_constraint;k++) { 2311 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2312 j++; 2313 } 2314 } 2315 /* found more than one primal dof on the cc */ 2316 if (j > 1) { 2317 PetscBTSet(qr_needed_idx,i); 2318 qr_needed = PETSC_TRUE; 2319 } 2320 } else { 2321 PetscBTSet(qr_needed_idx,i); 2322 qr_needed = PETSC_TRUE; 2323 } 2324 } 2325 } 2326 /* free workspace */ 2327 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2328 2329 /* permute indices in order to have a sorted set of vertices */ 2330 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2331 2332 /* nonzero structure of constraint matrix */ 2333 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2334 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2335 j=total_primal_vertices; 2336 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2337 if (!PetscBTLookup(change_basis,i)) { 2338 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2339 j++; 2340 } 2341 } 2342 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2343 ierr = PetscFree(nnz);CHKERRQ(ierr); 2344 /* set values in constraint matrix */ 2345 for (i=0;i<total_primal_vertices;i++) { 2346 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2347 } 2348 total_counts = total_primal_vertices; 2349 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2350 if (!PetscBTLookup(change_basis,i)) { 2351 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2352 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2353 total_counts++; 2354 } 2355 } 2356 /* assembling */ 2357 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 /* 2360 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2361 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2362 */ 2363 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2364 if (pcbddc->use_change_of_basis) { 2365 /* dual and primal dofs on a single cc */ 2366 PetscInt dual_dofs,primal_dofs; 2367 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2368 PetscInt primal_counter; 2369 /* working stuff for GEQRF */ 2370 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2371 PetscBLASInt lqr_work; 2372 /* working stuff for UNGQR */ 2373 PetscScalar *gqr_work,lgqr_work_t; 2374 PetscBLASInt lgqr_work; 2375 /* working stuff for TRTRS */ 2376 PetscScalar *trs_rhs; 2377 PetscBLASInt Blas_NRHS; 2378 /* pointers for values insertion into change of basis matrix */ 2379 PetscInt *start_rows,*start_cols; 2380 PetscScalar *start_vals; 2381 /* working stuff for values insertion */ 2382 PetscBT is_primal; 2383 /* matrix sizes */ 2384 PetscInt global_size,local_size; 2385 /* work array for nonzeros */ 2386 PetscScalar *nnz_array; 2387 /* temporary change of basis */ 2388 Mat localChangeOfBasisMatrix; 2389 /* auxiliary work for global change of basis */ 2390 Vec nnz_vec; 2391 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2392 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2393 PetscScalar *vals; 2394 PetscBool done; 2395 2396 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2397 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2398 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2399 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2400 2401 /* nonzeros for local mat */ 2402 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2403 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2404 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2405 if (PetscBTLookup(change_basis,i)) { 2406 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2407 if (PetscBTLookup(qr_needed_idx,i)) { 2408 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2409 } else { 2410 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2411 /* get local primal index on the cc */ 2412 j = 0; 2413 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2414 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2415 } 2416 } 2417 } 2418 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2419 /* Set initial identity in the matrix */ 2420 for (i=0;i<pcis->n_B;i++) { 2421 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2422 } 2423 2424 if (pcbddc->dbg_flag) { 2425 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2427 } 2428 2429 2430 /* Now we loop on the constraints which need a change of basis */ 2431 /* 2432 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2433 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2434 2435 Basic blocks of change of basis matrix T computed by 2436 2437 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2438 2439 | 1 0 ... 0 s_1/S | 2440 | 0 1 ... 0 s_2/S | 2441 | ... | 2442 | 0 ... 1 s_{n-1}/S | 2443 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2444 2445 with S = \sum_{i=1}^n s_i^2 2446 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2447 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2448 2449 - QR decomposition of constraints otherwise 2450 */ 2451 if (qr_needed) { 2452 /* space to store Q */ 2453 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2454 /* first we issue queries for optimal work */ 2455 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2456 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2457 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2458 lqr_work = -1; 2459 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2460 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2461 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2462 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2463 lgqr_work = -1; 2464 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2465 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2466 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2467 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2468 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2469 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2470 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2471 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2472 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2473 /* array to store scaling factors for reflectors */ 2474 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2475 /* array to store rhs and solution of triangular solver */ 2476 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2477 /* allocating workspace for check */ 2478 if (pcbddc->dbg_flag) { 2479 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2480 } 2481 } 2482 /* array to store whether a node is primal or not */ 2483 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2484 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2485 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2486 if (i != total_primal_vertices) { 2487 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2488 } 2489 for (i=0;i<total_primal_vertices;i++) { 2490 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2491 } 2492 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2493 2494 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2495 /* -> using implicit ordering contained in temp_indices data */ 2496 total_counts = pcbddc->n_vertices; 2497 primal_counter = total_counts; 2498 while (total_counts<pcbddc->local_primal_size) { 2499 primal_dofs = 1; 2500 if (PetscBTLookup(change_basis,total_counts)) { 2501 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2502 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2503 primal_dofs++; 2504 } 2505 /* get constraint info */ 2506 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2507 dual_dofs = size_of_constraint-primal_dofs; 2508 2509 if (pcbddc->dbg_flag) { 2510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2511 } 2512 2513 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2514 2515 /* copy quadrature constraints for change of basis check */ 2516 if (pcbddc->dbg_flag) { 2517 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2518 } 2519 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2520 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2521 2522 /* compute QR decomposition of constraints */ 2523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2524 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2526 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2527 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2528 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2529 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2530 2531 /* explictly compute R^-T */ 2532 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2533 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2534 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2539 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2540 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2542 2543 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2546 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2547 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2548 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2549 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2550 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2552 2553 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2554 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2555 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2557 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2558 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2560 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2562 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2563 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2564 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2565 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2566 2567 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2568 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2569 /* insert cols for primal dofs */ 2570 for (j=0;j<primal_dofs;j++) { 2571 start_vals = &qr_basis[j*size_of_constraint]; 2572 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2573 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2574 } 2575 /* insert cols for dual dofs */ 2576 for (j=0,k=0;j<dual_dofs;k++) { 2577 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2578 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2579 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2580 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2581 j++; 2582 } 2583 } 2584 2585 /* check change of basis */ 2586 if (pcbddc->dbg_flag) { 2587 PetscInt ii,jj; 2588 PetscBool valid_qr=PETSC_TRUE; 2589 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2594 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2595 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2596 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2597 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2598 for (jj=0;jj<size_of_constraint;jj++) { 2599 for (ii=0;ii<primal_dofs;ii++) { 2600 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2601 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2602 } 2603 } 2604 if (!valid_qr) { 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2606 for (jj=0;jj<size_of_constraint;jj++) { 2607 for (ii=0;ii<primal_dofs;ii++) { 2608 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2609 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2610 } 2611 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2612 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2613 } 2614 } 2615 } 2616 } else { 2617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2618 } 2619 } 2620 } else { /* simple transformation block */ 2621 PetscInt row,col; 2622 PetscScalar val,norm; 2623 2624 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2625 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2626 for (j=0;j<size_of_constraint;j++) { 2627 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2628 if (!PetscBTLookup(is_primal,row)) { 2629 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2630 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2631 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2632 } else { 2633 for (k=0;k<size_of_constraint;k++) { 2634 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2635 if (row != col) { 2636 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2637 } else { 2638 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2639 } 2640 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2641 } 2642 } 2643 } 2644 if (pcbddc->dbg_flag) { 2645 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2646 } 2647 } 2648 /* increment primal counter */ 2649 primal_counter += primal_dofs; 2650 } else { 2651 if (pcbddc->dbg_flag) { 2652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2653 } 2654 } 2655 /* increment constraint counter total_counts */ 2656 total_counts += primal_dofs; 2657 } 2658 2659 /* free workspace */ 2660 if (qr_needed) { 2661 if (pcbddc->dbg_flag) { 2662 ierr = PetscFree(work);CHKERRQ(ierr); 2663 } 2664 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2665 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2666 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2667 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2668 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2669 } 2670 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2671 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2673 2674 /* assembling of global change of variable */ 2675 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2676 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2677 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2678 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2679 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2680 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2681 2682 /* nonzeros (overestimated) */ 2683 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2684 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2685 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2686 for (i=0;i<pcis->n;i++) { 2687 nnz_array[i] = 1.0; 2688 idxs_all[i] = i; 2689 } 2690 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2691 for (i=0;i<pcis->n_B;i++) { 2692 nnz_array[idxs_B[i]] = nnz[i]; 2693 } 2694 if (pcis->n) { 2695 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2696 } 2697 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2698 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2699 ierr = PetscFree(nnz);CHKERRQ(ierr); 2700 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2701 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2702 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2703 for (i=0;i<local_size;i++) { 2704 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2705 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2706 } 2707 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2708 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2709 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2710 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2711 2712 /* Set identity on dirichlet dofs */ 2713 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2714 for (i=0;i<pcis->n-pcis->n_B;i++) { 2715 PetscScalar one=1.0; 2716 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2717 } 2718 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2719 2720 /* Set values at interface dofs */ 2721 done = PETSC_TRUE; 2722 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2723 if (!done) { 2724 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2725 } 2726 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2727 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2728 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2729 for (i=0;i<nvtxs;i++) { 2730 PetscInt row,*cols,ncols; 2731 PetscScalar *mat_vals; 2732 2733 row = idxs_B[i]; 2734 ncols = xadj[i+1]-xadj[i]; 2735 cols = idxs_mapped+xadj[i]; 2736 mat_vals = vals+xadj[i]; 2737 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2738 } 2739 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2740 if (!done) { 2741 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2742 } 2743 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2744 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2745 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2746 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2748 2749 /* check */ 2750 if (pcbddc->dbg_flag) { 2751 PetscReal error; 2752 Vec x,x_change; 2753 2754 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2755 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2756 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2757 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2758 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2759 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2761 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2762 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2763 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2764 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2765 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2766 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2767 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2768 ierr = VecDestroy(&x);CHKERRQ(ierr); 2769 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2770 } 2771 2772 /* adapt sub_schurs computed (if any) */ 2773 if (pcbddc->use_deluxe_scaling) { 2774 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2775 if (sub_schurs->n_subs_par_g) { 2776 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2777 } 2778 if (sub_schurs->S_Ej_all) { 2779 Mat S_1,S_2,tmat; 2780 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,sub_schurs->is_Ej_all,sub_schurs->is_Ej_all,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2781 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2782 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2783 sub_schurs->S_Ej_all = S_1; 2784 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2785 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2786 sub_schurs->sum_S_Ej_all = S_2; 2787 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2788 } 2789 } 2790 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2791 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2792 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2793 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2794 } 2795 2796 /* set up change of basis context */ 2797 if (pcbddc->ChangeOfBasisMatrix) { 2798 PCBDDCChange_ctx change_ctx; 2799 2800 if (!pcbddc->new_global_mat) { 2801 PetscInt global_size,local_size; 2802 2803 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2804 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2805 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2806 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2807 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2808 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2809 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2810 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2811 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2812 } else { 2813 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2814 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2815 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2816 } 2817 if (!pcbddc->user_ChangeOfBasisMatrix) { 2818 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2819 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2820 } else { 2821 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2822 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2823 } 2824 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2825 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2826 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2827 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2828 } 2829 2830 /* get indices in local ordering for vertices and constraints */ 2831 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2832 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2833 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2834 } 2835 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2836 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2837 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2838 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2839 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2840 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2841 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2842 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2843 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2844 /* set quantities in PCBDDC data struct */ 2845 pcbddc->n_actual_vertices = i; 2846 /* check if a new primal space has been introduced */ 2847 pcbddc->new_primal_space_local = PETSC_TRUE; 2848 if (olocal_primal_size == pcbddc->local_primal_size) { 2849 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2850 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2851 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2852 } 2853 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2854 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2855 2856 /* flush dbg viewer */ 2857 if (pcbddc->dbg_flag) { 2858 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2859 } 2860 2861 /* free workspace */ 2862 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2863 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2864 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2865 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2866 if (!pcbddc->adaptive_selection) { 2867 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2868 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2869 } else { 2870 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2871 pcbddc->adaptive_constraints_ptrs, 2872 pcbddc->adaptive_constraints_idxs, 2873 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2874 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2875 } 2876 PetscFunctionReturn(0); 2877 } 2878 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2881 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2882 { 2883 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2884 PC_IS *pcis = (PC_IS*)pc->data; 2885 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2886 PetscInt ierr,i,vertex_size; 2887 PetscViewer viewer=pcbddc->dbg_viewer; 2888 2889 PetscFunctionBegin; 2890 /* Reset previously computed graph */ 2891 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2892 /* Init local Graph struct */ 2893 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2894 2895 /* Check validity of the csr graph passed in by the user */ 2896 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2897 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2898 } 2899 2900 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2901 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2902 Mat mat_adj; 2903 PetscInt *xadj,*adjncy; 2904 PetscInt nvtxs; 2905 PetscBool flg_row=PETSC_TRUE; 2906 2907 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2908 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2909 if (!flg_row) { 2910 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2911 } 2912 if (pcbddc->use_local_adj) { 2913 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2914 pcbddc->computed_rowadj = PETSC_TRUE; 2915 } else { /* just compute subdomain's connected components */ 2916 IS is_dummy; 2917 ISLocalToGlobalMapping l2gmap_dummy; 2918 PetscInt j,sum; 2919 PetscInt *cxadj,*cadjncy; 2920 const PetscInt *idxs; 2921 PCBDDCGraph graph; 2922 PetscBT is_on_boundary; 2923 2924 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2925 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2926 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2927 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2928 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2929 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2930 graph->xadj = xadj; 2931 graph->adjncy = adjncy; 2932 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2933 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2934 2935 if (pcbddc->dbg_flag) { 2936 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2937 for (i=0;i<graph->ncc;i++) { 2938 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2939 } 2940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2941 } 2942 2943 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2944 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2945 for (i=0;i<pcis->n_B;i++) { 2946 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2947 } 2948 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2949 2950 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2951 sum = 0; 2952 for (i=0;i<graph->ncc;i++) { 2953 PetscInt sizecc = 0; 2954 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2955 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2956 sizecc++; 2957 } 2958 } 2959 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2960 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2961 cxadj[graph->queue[j]] = sizecc; 2962 } 2963 } 2964 sum += sizecc*sizecc; 2965 } 2966 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2967 sum = 0; 2968 for (i=0;i<nvtxs;i++) { 2969 PetscInt temp = cxadj[i]; 2970 cxadj[i] = sum; 2971 sum += temp; 2972 } 2973 cxadj[nvtxs] = sum; 2974 for (i=0;i<graph->ncc;i++) { 2975 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2976 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2977 PetscInt k,sizecc = 0; 2978 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2979 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2980 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2981 sizecc++; 2982 } 2983 } 2984 } 2985 } 2986 } 2987 if (nvtxs) { 2988 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2989 } else { 2990 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2991 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2992 } 2993 graph->xadj = 0; 2994 graph->adjncy = 0; 2995 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2996 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2997 } 2998 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2999 if (!flg_row) { 3000 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 3001 } 3002 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3003 } 3004 3005 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3006 vertex_size = 1; 3007 if (pcbddc->user_provided_isfordofs) { 3008 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3009 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3010 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3011 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3012 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3013 } 3014 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3015 pcbddc->n_ISForDofs = 0; 3016 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3017 } 3018 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3019 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3020 } else { 3021 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3022 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3023 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3024 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3025 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3026 } 3027 } 3028 } 3029 3030 /* Setup of Graph */ 3031 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3032 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3033 } 3034 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3035 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3036 } 3037 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 3038 3039 /* Graph's connected components analysis */ 3040 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3041 3042 /* print some info to stdout */ 3043 if (pcbddc->dbg_flag) { 3044 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 3045 } 3046 3047 /* mark topography has done */ 3048 pcbddc->recompute_topography = PETSC_FALSE; 3049 PetscFunctionReturn(0); 3050 } 3051 3052 #undef __FUNCT__ 3053 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3054 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3055 { 3056 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3057 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3058 PetscErrorCode ierr; 3059 3060 PetscFunctionBegin; 3061 n = 0; 3062 vertices = 0; 3063 if (pcbddc->ConstraintMatrix) { 3064 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3065 for (i=0;i<local_primal_size;i++) { 3066 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3067 if (size_of_constraint == 1) n++; 3068 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3069 } 3070 if (vertices_idx) { 3071 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3072 n = 0; 3073 for (i=0;i<local_primal_size;i++) { 3074 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3075 if (size_of_constraint == 1) { 3076 vertices[n++]=row_cmat_indices[0]; 3077 } 3078 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3079 } 3080 } 3081 } 3082 *n_vertices = n; 3083 if (vertices_idx) *vertices_idx = vertices; 3084 PetscFunctionReturn(0); 3085 } 3086 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3089 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3090 { 3091 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3092 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3093 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3094 PetscBT touched; 3095 PetscErrorCode ierr; 3096 3097 /* This function assumes that the number of local constraints per connected component 3098 is not greater than the number of nodes defined for the connected component 3099 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3100 PetscFunctionBegin; 3101 n = 0; 3102 constraints_index = 0; 3103 if (pcbddc->ConstraintMatrix) { 3104 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3105 max_size_of_constraint = 0; 3106 for (i=0;i<local_primal_size;i++) { 3107 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3108 if (size_of_constraint > 1) { 3109 n++; 3110 } 3111 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3112 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3113 } 3114 if (constraints_idx) { 3115 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3116 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3117 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3118 n = 0; 3119 for (i=0;i<local_primal_size;i++) { 3120 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3121 if (size_of_constraint > 1) { 3122 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3123 /* find first untouched local node */ 3124 j = 0; 3125 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3126 min_index = row_cmat_global_indices[j]; 3127 min_loc = j; 3128 /* search the minimum among nodes not yet touched on the connected component 3129 since there can be more than one constraint on a single cc */ 3130 for (j=1;j<size_of_constraint;j++) { 3131 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3132 min_index = row_cmat_global_indices[j]; 3133 min_loc = j; 3134 } 3135 } 3136 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3137 constraints_index[n++] = row_cmat_indices[min_loc]; 3138 } 3139 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3140 } 3141 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3142 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3143 } 3144 } 3145 *n_constraints = n; 3146 if (constraints_idx) *constraints_idx = constraints_index; 3147 PetscFunctionReturn(0); 3148 } 3149 3150 #undef __FUNCT__ 3151 #define __FUNCT__ "PCBDDCSubsetNumbering" 3152 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3153 { 3154 Vec local_vec,global_vec; 3155 IS seqis,paris; 3156 VecScatter scatter_ctx; 3157 PetscScalar *array; 3158 PetscInt *temp_global_dofs; 3159 PetscScalar globalsum; 3160 PetscInt i,j,s; 3161 PetscInt nlocals,first_index,old_index,max_local; 3162 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3163 PetscMPIInt *dof_sizes,*dof_displs; 3164 PetscBool first_found; 3165 PetscErrorCode ierr; 3166 3167 PetscFunctionBegin; 3168 /* mpi buffers */ 3169 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3170 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3171 j = ( !rank_prec_comm ? size_prec_comm : 0); 3172 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3173 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3174 /* get maximum size of subset */ 3175 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3176 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3177 max_local = 0; 3178 for (i=0;i<n_local_dofs;i++) { 3179 if (max_local < temp_global_dofs[i] ) { 3180 max_local = temp_global_dofs[i]; 3181 } 3182 } 3183 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3184 max_global++; 3185 max_local = 0; 3186 for (i=0;i<n_local_dofs;i++) { 3187 if (max_local < local_dofs[i] ) { 3188 max_local = local_dofs[i]; 3189 } 3190 } 3191 max_local++; 3192 /* allocate workspace */ 3193 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3194 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3195 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3196 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3197 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3198 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3199 /* create scatter */ 3200 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3201 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3202 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3203 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3204 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3205 /* init array */ 3206 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3207 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3208 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3209 if (local_dofs_mult) { 3210 for (i=0;i<n_local_dofs;i++) { 3211 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3212 } 3213 } else { 3214 for (i=0;i<n_local_dofs;i++) { 3215 array[local_dofs[i]]=1.0; 3216 } 3217 } 3218 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3219 /* scatter into global vec and get total number of global dofs */ 3220 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3221 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3222 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3223 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3224 /* Fill global_vec with cumulative function for global numbering */ 3225 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3226 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3227 nlocals = 0; 3228 first_index = -1; 3229 first_found = PETSC_FALSE; 3230 for (i=0;i<s;i++) { 3231 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3232 first_found = PETSC_TRUE; 3233 first_index = i; 3234 } 3235 nlocals += (PetscInt)PetscRealPart(array[i]); 3236 } 3237 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3238 if (!rank_prec_comm) { 3239 dof_displs[0]=0; 3240 for (i=1;i<size_prec_comm;i++) { 3241 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3242 } 3243 } 3244 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3245 if (first_found) { 3246 array[first_index] += (PetscScalar)nlocals; 3247 old_index = first_index; 3248 for (i=first_index+1;i<s;i++) { 3249 if (PetscRealPart(array[i]) > 0.1) { 3250 array[i] += array[old_index]; 3251 old_index = i; 3252 } 3253 } 3254 } 3255 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3256 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3257 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3258 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3259 /* get global ordering of local dofs */ 3260 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3261 if (local_dofs_mult) { 3262 for (i=0;i<n_local_dofs;i++) { 3263 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3264 } 3265 } else { 3266 for (i=0;i<n_local_dofs;i++) { 3267 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3268 } 3269 } 3270 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3271 /* free workspace */ 3272 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3273 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3274 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3275 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3276 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3277 /* return pointer to global ordering of local dofs */ 3278 *global_numbering_subset = temp_global_dofs; 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3284 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3285 { 3286 PetscInt i,j; 3287 PetscScalar *alphas; 3288 PetscErrorCode ierr; 3289 3290 PetscFunctionBegin; 3291 /* this implements stabilized Gram-Schmidt */ 3292 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3293 for (i=0;i<n;i++) { 3294 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3295 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3296 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3297 } 3298 ierr = PetscFree(alphas);CHKERRQ(ierr); 3299 PetscFunctionReturn(0); 3300 } 3301 3302 #undef __FUNCT__ 3303 #define __FUNCT__ "MatISGetSubassemblingPattern" 3304 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3305 { 3306 Mat subdomain_adj; 3307 IS new_ranks,ranks_send_to; 3308 MatPartitioning partitioner; 3309 Mat_IS *matis; 3310 PetscInt n_neighs,*neighs,*n_shared,**shared; 3311 PetscInt prank; 3312 PetscMPIInt size,rank,color; 3313 PetscInt *xadj,*adjncy,*oldranks; 3314 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3315 PetscInt i,local_size,threshold=0; 3316 PetscErrorCode ierr; 3317 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3318 PetscSubcomm subcomm; 3319 3320 PetscFunctionBegin; 3321 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3322 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3323 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3324 3325 /* Get info on mapping */ 3326 matis = (Mat_IS*)(mat->data); 3327 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3328 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3329 3330 /* build local CSR graph of subdomains' connectivity */ 3331 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3332 xadj[0] = 0; 3333 xadj[1] = PetscMax(n_neighs-1,0); 3334 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3335 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3336 3337 if (threshold) { 3338 PetscInt xadj_count = 0; 3339 for (i=1;i<n_neighs;i++) { 3340 if (n_shared[i] > threshold) { 3341 adjncy[xadj_count] = neighs[i]; 3342 adjncy_wgt[xadj_count] = n_shared[i]; 3343 xadj_count++; 3344 } 3345 } 3346 xadj[1] = xadj_count; 3347 } else { 3348 if (xadj[1]) { 3349 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3350 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3351 } 3352 } 3353 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3354 if (use_square) { 3355 for (i=0;i<xadj[1];i++) { 3356 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3357 } 3358 } 3359 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3360 3361 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3362 3363 /* 3364 Restrict work on active processes only. 3365 */ 3366 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3367 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3368 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3369 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3370 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3371 if (color) { 3372 ierr = PetscFree(xadj);CHKERRQ(ierr); 3373 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3374 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3375 } else { 3376 PetscInt coarsening_ratio; 3377 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3378 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3379 prank = rank; 3380 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3381 /* 3382 for (i=0;i<size;i++) { 3383 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3384 } 3385 */ 3386 for (i=0;i<xadj[1];i++) { 3387 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3388 } 3389 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3390 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3391 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3392 3393 /* Partition */ 3394 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3395 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3396 if (use_vwgt) { 3397 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3398 v_wgt[0] = local_size; 3399 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3400 } 3401 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3402 coarsening_ratio = size/n_subdomains; 3403 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3404 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3405 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3406 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3407 3408 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3409 if (contiguous) { 3410 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3411 } else { 3412 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3413 } 3414 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3415 /* clean up */ 3416 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3417 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3418 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3419 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3420 } 3421 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3422 3423 /* assemble parallel IS for sends */ 3424 i = 1; 3425 if (color) i=0; 3426 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3427 3428 /* get back IS */ 3429 *is_sends = ranks_send_to; 3430 PetscFunctionReturn(0); 3431 } 3432 3433 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3434 3435 #undef __FUNCT__ 3436 #define __FUNCT__ "MatISSubassemble" 3437 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3438 { 3439 Mat local_mat; 3440 Mat_IS *matis; 3441 IS is_sends_internal; 3442 PetscInt rows,cols,new_local_rows; 3443 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3444 PetscBool ismatis,isdense,newisdense,destroy_mat; 3445 ISLocalToGlobalMapping l2gmap; 3446 PetscInt* l2gmap_indices; 3447 const PetscInt* is_indices; 3448 MatType new_local_type; 3449 /* buffers */ 3450 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3451 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3452 PetscInt *recv_buffer_idxs_local; 3453 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3454 /* MPI */ 3455 MPI_Comm comm,comm_n; 3456 PetscSubcomm subcomm; 3457 PetscMPIInt n_sends,n_recvs,commsize; 3458 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3459 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3460 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3461 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3462 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3463 PetscErrorCode ierr; 3464 3465 PetscFunctionBegin; 3466 /* TODO: add missing checks */ 3467 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3468 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3469 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3470 PetscValidLogicalCollectiveInt(mat,nis,7); 3471 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3472 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3473 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3474 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3475 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3476 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3477 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3478 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3479 PetscInt mrows,mcols,mnrows,mncols; 3480 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3481 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3482 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3483 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3484 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3485 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3486 } 3487 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3488 PetscValidLogicalCollectiveInt(mat,bs,0); 3489 /* prepare IS for sending if not provided */ 3490 if (!is_sends) { 3491 PetscBool pcontig = PETSC_TRUE; 3492 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3493 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3494 } else { 3495 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3496 is_sends_internal = is_sends; 3497 } 3498 3499 /* get pointer of MATIS data */ 3500 matis = (Mat_IS*)mat->data; 3501 3502 /* get comm */ 3503 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3504 3505 /* compute number of sends */ 3506 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3507 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3508 3509 /* compute number of receives */ 3510 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3511 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3512 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3513 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3514 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3515 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3516 ierr = PetscFree(iflags);CHKERRQ(ierr); 3517 3518 /* restrict comm if requested */ 3519 subcomm = 0; 3520 destroy_mat = PETSC_FALSE; 3521 if (restrict_comm) { 3522 PetscMPIInt color,rank,subcommsize; 3523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3524 color = 0; 3525 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3526 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3527 subcommsize = commsize - subcommsize; 3528 /* check if reuse has been requested */ 3529 if (reuse == MAT_REUSE_MATRIX) { 3530 if (*mat_n) { 3531 PetscMPIInt subcommsize2; 3532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3533 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3534 comm_n = PetscObjectComm((PetscObject)*mat_n); 3535 } else { 3536 comm_n = PETSC_COMM_SELF; 3537 } 3538 } else { /* MAT_INITIAL_MATRIX */ 3539 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3540 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3541 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3542 comm_n = subcomm->comm; 3543 } 3544 /* flag to destroy *mat_n if not significative */ 3545 if (color) destroy_mat = PETSC_TRUE; 3546 } else { 3547 comm_n = comm; 3548 } 3549 3550 /* prepare send/receive buffers */ 3551 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3552 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3553 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3554 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3555 if (nis) { 3556 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3557 } 3558 3559 /* Get data from local matrices */ 3560 if (!isdense) { 3561 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3562 /* TODO: See below some guidelines on how to prepare the local buffers */ 3563 /* 3564 send_buffer_vals should contain the raw values of the local matrix 3565 send_buffer_idxs should contain: 3566 - MatType_PRIVATE type 3567 - PetscInt size_of_l2gmap 3568 - PetscInt global_row_indices[size_of_l2gmap] 3569 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3570 */ 3571 } else { 3572 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3573 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3574 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3575 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3576 send_buffer_idxs[1] = i; 3577 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3578 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3579 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3580 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3581 for (i=0;i<n_sends;i++) { 3582 ilengths_vals[is_indices[i]] = len*len; 3583 ilengths_idxs[is_indices[i]] = len+2; 3584 } 3585 } 3586 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3587 /* additional is (if any) */ 3588 if (nis) { 3589 PetscMPIInt psum; 3590 PetscInt j; 3591 for (j=0,psum=0;j<nis;j++) { 3592 PetscInt plen; 3593 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3594 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3595 psum += len+1; /* indices + lenght */ 3596 } 3597 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3598 for (j=0,psum=0;j<nis;j++) { 3599 PetscInt plen; 3600 const PetscInt *is_array_idxs; 3601 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3602 send_buffer_idxs_is[psum] = plen; 3603 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3604 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3605 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3606 psum += plen+1; /* indices + lenght */ 3607 } 3608 for (i=0;i<n_sends;i++) { 3609 ilengths_idxs_is[is_indices[i]] = psum; 3610 } 3611 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3612 } 3613 3614 buf_size_idxs = 0; 3615 buf_size_vals = 0; 3616 buf_size_idxs_is = 0; 3617 for (i=0;i<n_recvs;i++) { 3618 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3619 buf_size_vals += (PetscInt)olengths_vals[i]; 3620 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3621 } 3622 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3623 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3624 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3625 3626 /* get new tags for clean communications */ 3627 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3628 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3629 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3630 3631 /* allocate for requests */ 3632 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3633 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3634 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3635 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3636 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3637 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3638 3639 /* communications */ 3640 ptr_idxs = recv_buffer_idxs; 3641 ptr_vals = recv_buffer_vals; 3642 ptr_idxs_is = recv_buffer_idxs_is; 3643 for (i=0;i<n_recvs;i++) { 3644 source_dest = onodes[i]; 3645 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3646 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3647 ptr_idxs += olengths_idxs[i]; 3648 ptr_vals += olengths_vals[i]; 3649 if (nis) { 3650 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3651 ptr_idxs_is += olengths_idxs_is[i]; 3652 } 3653 } 3654 for (i=0;i<n_sends;i++) { 3655 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3656 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3657 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3658 if (nis) { 3659 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3660 } 3661 } 3662 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3663 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3664 3665 /* assemble new l2g map */ 3666 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3667 ptr_idxs = recv_buffer_idxs; 3668 new_local_rows = 0; 3669 for (i=0;i<n_recvs;i++) { 3670 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3671 ptr_idxs += olengths_idxs[i]; 3672 } 3673 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3674 ptr_idxs = recv_buffer_idxs; 3675 new_local_rows = 0; 3676 for (i=0;i<n_recvs;i++) { 3677 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3678 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3679 ptr_idxs += olengths_idxs[i]; 3680 } 3681 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3682 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3683 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3684 3685 /* infer new local matrix type from received local matrices type */ 3686 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3687 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3688 if (n_recvs) { 3689 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3690 ptr_idxs = recv_buffer_idxs; 3691 for (i=0;i<n_recvs;i++) { 3692 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3693 new_local_type_private = MATAIJ_PRIVATE; 3694 break; 3695 } 3696 ptr_idxs += olengths_idxs[i]; 3697 } 3698 switch (new_local_type_private) { 3699 case MATDENSE_PRIVATE: 3700 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3701 new_local_type = MATSEQAIJ; 3702 bs = 1; 3703 } else { /* if I receive only 1 dense matrix */ 3704 new_local_type = MATSEQDENSE; 3705 bs = 1; 3706 } 3707 break; 3708 case MATAIJ_PRIVATE: 3709 new_local_type = MATSEQAIJ; 3710 bs = 1; 3711 break; 3712 case MATBAIJ_PRIVATE: 3713 new_local_type = MATSEQBAIJ; 3714 break; 3715 case MATSBAIJ_PRIVATE: 3716 new_local_type = MATSEQSBAIJ; 3717 break; 3718 default: 3719 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3720 break; 3721 } 3722 } else { /* by default, new_local_type is seqdense */ 3723 new_local_type = MATSEQDENSE; 3724 bs = 1; 3725 } 3726 3727 /* create MATIS object if needed */ 3728 if (reuse == MAT_INITIAL_MATRIX) { 3729 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3730 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3731 } else { 3732 /* it also destroys the local matrices */ 3733 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3734 } 3735 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3736 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3737 3738 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3739 3740 /* Global to local map of received indices */ 3741 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3742 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3743 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3744 3745 /* restore attributes -> type of incoming data and its size */ 3746 buf_size_idxs = 0; 3747 for (i=0;i<n_recvs;i++) { 3748 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3749 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3750 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3751 } 3752 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3753 3754 /* set preallocation */ 3755 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3756 if (!newisdense) { 3757 PetscInt *new_local_nnz=0; 3758 3759 ptr_vals = recv_buffer_vals; 3760 ptr_idxs = recv_buffer_idxs_local; 3761 if (n_recvs) { 3762 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3763 } 3764 for (i=0;i<n_recvs;i++) { 3765 PetscInt j; 3766 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3767 for (j=0;j<*(ptr_idxs+1);j++) { 3768 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3769 } 3770 } else { 3771 /* TODO */ 3772 } 3773 ptr_idxs += olengths_idxs[i]; 3774 } 3775 if (new_local_nnz) { 3776 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3777 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3778 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3779 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3780 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3781 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3782 } else { 3783 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3784 } 3785 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3786 } else { 3787 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3788 } 3789 3790 /* set values */ 3791 ptr_vals = recv_buffer_vals; 3792 ptr_idxs = recv_buffer_idxs_local; 3793 for (i=0;i<n_recvs;i++) { 3794 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3795 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3796 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3797 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3798 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3799 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3800 } else { 3801 /* TODO */ 3802 } 3803 ptr_idxs += olengths_idxs[i]; 3804 ptr_vals += olengths_vals[i]; 3805 } 3806 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3807 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3808 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3809 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3811 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3812 3813 #if 0 3814 if (!restrict_comm) { /* check */ 3815 Vec lvec,rvec; 3816 PetscReal infty_error; 3817 3818 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3819 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3820 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3821 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3822 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3823 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3824 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3825 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3826 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3827 } 3828 #endif 3829 3830 /* assemble new additional is (if any) */ 3831 if (nis) { 3832 PetscInt **temp_idxs,*count_is,j,psum; 3833 3834 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3835 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3836 ptr_idxs = recv_buffer_idxs_is; 3837 psum = 0; 3838 for (i=0;i<n_recvs;i++) { 3839 for (j=0;j<nis;j++) { 3840 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3841 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3842 psum += plen; 3843 ptr_idxs += plen+1; /* shift pointer to received data */ 3844 } 3845 } 3846 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3847 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3848 for (i=1;i<nis;i++) { 3849 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3850 } 3851 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3852 ptr_idxs = recv_buffer_idxs_is; 3853 for (i=0;i<n_recvs;i++) { 3854 for (j=0;j<nis;j++) { 3855 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3856 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3857 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3858 ptr_idxs += plen+1; /* shift pointer to received data */ 3859 } 3860 } 3861 for (i=0;i<nis;i++) { 3862 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3863 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3864 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3865 } 3866 ierr = PetscFree(count_is);CHKERRQ(ierr); 3867 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3868 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3869 } 3870 /* free workspace */ 3871 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3872 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3873 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3874 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3875 if (isdense) { 3876 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3877 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3878 } else { 3879 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3880 } 3881 if (nis) { 3882 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3883 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3884 } 3885 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3886 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3887 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3888 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3889 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3890 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3891 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3892 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3893 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3894 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3895 ierr = PetscFree(onodes);CHKERRQ(ierr); 3896 if (nis) { 3897 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3898 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3899 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3900 } 3901 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3902 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3903 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3904 for (i=0;i<nis;i++) { 3905 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3906 } 3907 } 3908 PetscFunctionReturn(0); 3909 } 3910 3911 /* temporary hack into ksp private data structure */ 3912 #include <petsc-private/kspimpl.h> 3913 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3916 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3917 { 3918 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3919 PC_IS *pcis = (PC_IS*)pc->data; 3920 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3921 MatNullSpace CoarseNullSpace=NULL; 3922 ISLocalToGlobalMapping coarse_islg; 3923 IS coarse_is,*isarray; 3924 PetscInt i,im_active=-1,active_procs=-1; 3925 PetscInt nis,nisdofs,nisneu; 3926 PC pc_temp; 3927 PCType coarse_pc_type; 3928 KSPType coarse_ksp_type; 3929 PetscBool multilevel_requested,multilevel_allowed; 3930 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3931 Mat t_coarse_mat_is; 3932 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3933 PetscMPIInt all_procs; 3934 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3935 PetscBool compute_vecs = PETSC_FALSE; 3936 PetscScalar *array; 3937 PetscErrorCode ierr; 3938 3939 PetscFunctionBegin; 3940 /* Assign global numbering to coarse dofs */ 3941 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3942 compute_vecs = PETSC_TRUE; 3943 PetscInt ocoarse_size; 3944 ocoarse_size = pcbddc->coarse_size; 3945 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3946 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3947 /* see if we can avoid some work */ 3948 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3949 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3950 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3951 coarse_reuse = PETSC_FALSE; 3952 } else { /* we can safely reuse already computed coarse matrix */ 3953 coarse_reuse = PETSC_TRUE; 3954 } 3955 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3956 coarse_reuse = PETSC_FALSE; 3957 } 3958 /* reset any subassembling information */ 3959 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3960 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3961 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3962 coarse_reuse = PETSC_TRUE; 3963 } 3964 3965 /* count "active" (i.e. with positive local size) and "void" processes */ 3966 im_active = !!(pcis->n); 3967 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3968 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3969 void_procs = all_procs-active_procs; 3970 csin_type_simple = PETSC_TRUE; 3971 redist = PETSC_FALSE; 3972 if (pcbddc->current_level && void_procs) { 3973 csin_ml = PETSC_TRUE; 3974 ncoarse_ml = void_procs; 3975 csin_ds = PETSC_TRUE; 3976 ncoarse_ds = void_procs; 3977 } else { 3978 csin_ml = PETSC_FALSE; 3979 ncoarse_ml = all_procs; 3980 if (void_procs) { 3981 csin_ds = PETSC_TRUE; 3982 ncoarse_ds = void_procs; 3983 csin_type_simple = PETSC_FALSE; 3984 } else { 3985 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3986 csin_ds = PETSC_TRUE; 3987 ncoarse_ds = pcbddc->redistribute_coarse; 3988 redist = PETSC_TRUE; 3989 } else { 3990 csin_ds = PETSC_FALSE; 3991 ncoarse_ds = all_procs; 3992 } 3993 } 3994 } 3995 3996 /* 3997 test if we can go multilevel: three conditions must be satisfied: 3998 - we have not exceeded the number of levels requested 3999 - we can actually subassemble the active processes 4000 - we can find a suitable number of MPI processes where we can place the subassembled problem 4001 */ 4002 multilevel_allowed = PETSC_FALSE; 4003 multilevel_requested = PETSC_FALSE; 4004 if (pcbddc->current_level < pcbddc->max_levels) { 4005 multilevel_requested = PETSC_TRUE; 4006 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4007 multilevel_allowed = PETSC_FALSE; 4008 } else { 4009 multilevel_allowed = PETSC_TRUE; 4010 } 4011 } 4012 /* determine number of process partecipating to coarse solver */ 4013 if (multilevel_allowed) { 4014 ncoarse = ncoarse_ml; 4015 csin = csin_ml; 4016 } else { 4017 ncoarse = ncoarse_ds; 4018 csin = csin_ds; 4019 } 4020 4021 /* creates temporary l2gmap and IS for coarse indexes */ 4022 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4023 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4024 4025 /* creates temporary MATIS object for coarse matrix */ 4026 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4027 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4028 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4029 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4030 #if 0 4031 { 4032 PetscViewer viewer; 4033 char filename[256]; 4034 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4035 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4036 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4037 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4038 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4039 } 4040 #endif 4041 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4042 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4043 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4044 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4045 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4046 4047 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4048 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4049 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4050 const PetscInt *idxs; 4051 ISLocalToGlobalMapping tmap; 4052 4053 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4054 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4055 /* allocate space for temporary storage */ 4056 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4057 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4058 /* allocate for IS array */ 4059 nisdofs = pcbddc->n_ISForDofsLocal; 4060 nisneu = !!pcbddc->NeumannBoundariesLocal; 4061 nis = nisdofs + nisneu; 4062 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4063 /* dofs splitting */ 4064 for (i=0;i<nisdofs;i++) { 4065 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4066 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4067 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4068 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4069 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4070 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4071 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4072 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4073 } 4074 /* neumann boundaries */ 4075 if (pcbddc->NeumannBoundariesLocal) { 4076 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4077 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4078 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4079 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4080 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4081 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4082 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4083 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4084 } 4085 /* free memory */ 4086 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4087 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4088 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4089 } else { 4090 nis = 0; 4091 nisdofs = 0; 4092 nisneu = 0; 4093 isarray = NULL; 4094 } 4095 /* destroy no longer needed map */ 4096 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4097 4098 /* restrict on coarse candidates (if needed) */ 4099 coarse_mat_is = NULL; 4100 if (csin) { 4101 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4102 if (redist) { 4103 PetscMPIInt rank; 4104 PetscInt spc,n_spc_p1,dest[1]; 4105 4106 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4107 spc = all_procs/pcbddc->redistribute_coarse; 4108 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 4109 if (rank > n_spc_p1*(spc+1)-1) { 4110 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4111 } else { 4112 dest[0] = rank/(spc+1); 4113 } 4114 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4115 } else { 4116 PetscInt j,tissize,*nisindices; 4117 PetscInt *coarse_candidates; 4118 const PetscInt* tisindices; 4119 /* get coarse candidates' ranks in pc communicator */ 4120 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4121 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4122 for (i=0,j=0;i<all_procs;i++) { 4123 if (!coarse_candidates[i]) { 4124 coarse_candidates[j]=i; 4125 j++; 4126 } 4127 } 4128 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 4129 /* get a suitable subassembling pattern */ 4130 if (csin_type_simple) { 4131 PetscMPIInt rank; 4132 PetscInt issize,isidx; 4133 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4134 if (im_active) { 4135 issize = 1; 4136 isidx = (PetscInt)rank; 4137 } else { 4138 issize = 0; 4139 isidx = -1; 4140 } 4141 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4142 } else { 4143 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4144 } 4145 if (pcbddc->dbg_flag) { 4146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4148 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4150 for (i=0;i<j;i++) { 4151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4152 } 4153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4155 } 4156 /* shift the pattern on coarse candidates */ 4157 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4158 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4159 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4160 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4161 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4162 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4163 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4164 } 4165 } 4166 if (pcbddc->dbg_flag) { 4167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4169 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4171 } 4172 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4173 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4174 } else { 4175 if (pcbddc->dbg_flag) { 4176 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4177 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4179 } 4180 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4181 coarse_mat_is = t_coarse_mat_is; 4182 } 4183 4184 /* create local to global scatters for coarse problem */ 4185 if (compute_vecs) { 4186 PetscInt lrows; 4187 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4188 if (coarse_mat_is) { 4189 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4190 } else { 4191 lrows = 0; 4192 } 4193 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4194 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4195 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4196 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4197 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4198 } 4199 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4200 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4201 4202 /* set defaults for coarse KSP and PC */ 4203 if (multilevel_allowed) { 4204 coarse_ksp_type = KSPRICHARDSON; 4205 coarse_pc_type = PCBDDC; 4206 } else { 4207 coarse_ksp_type = KSPPREONLY; 4208 coarse_pc_type = PCREDUNDANT; 4209 } 4210 4211 /* print some info if requested */ 4212 if (pcbddc->dbg_flag) { 4213 if (!multilevel_allowed) { 4214 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4215 if (multilevel_requested) { 4216 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4217 } else if (pcbddc->max_levels) { 4218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4219 } 4220 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4221 } 4222 } 4223 4224 /* create the coarse KSP object only once with defaults */ 4225 if (coarse_mat_is) { 4226 MatReuse coarse_mat_reuse; 4227 PetscViewer dbg_viewer = NULL; 4228 if (pcbddc->dbg_flag) { 4229 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4230 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4231 } 4232 if (!pcbddc->coarse_ksp) { 4233 char prefix[256],str_level[16]; 4234 size_t len; 4235 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4236 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4237 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4238 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4239 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4240 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4241 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4242 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4243 /* prefix */ 4244 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4245 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4246 if (!pcbddc->current_level) { 4247 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4248 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4249 } else { 4250 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4251 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4252 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4253 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4254 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4255 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4256 } 4257 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4258 /* allow user customization */ 4259 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4260 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4261 } 4262 4263 /* get some info after set from options */ 4264 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4265 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4266 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4267 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4268 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4269 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4270 isbddc = PETSC_FALSE; 4271 } 4272 if (isredundant) { 4273 KSP inner_ksp; 4274 PC inner_pc; 4275 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4276 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4277 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4278 } 4279 4280 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4281 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4282 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4283 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4284 if (nisdofs) { 4285 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4286 for (i=0;i<nisdofs;i++) { 4287 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4288 } 4289 } 4290 if (nisneu) { 4291 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4292 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4293 } 4294 4295 /* assemble coarse matrix */ 4296 if (coarse_reuse) { 4297 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4298 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4299 coarse_mat_reuse = MAT_REUSE_MATRIX; 4300 } else { 4301 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4302 } 4303 if (isbddc || isnn) { 4304 if (pcbddc->coarsening_ratio > 1) { 4305 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4306 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4307 if (pcbddc->dbg_flag) { 4308 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4309 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4310 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4311 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4312 } 4313 } 4314 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4315 } else { 4316 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4317 coarse_mat = coarse_mat_is; 4318 } 4319 } else { 4320 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4321 } 4322 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4323 4324 /* propagate symmetry info to coarse matrix */ 4325 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4326 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4327 4328 /* set operators */ 4329 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4330 if (pcbddc->dbg_flag) { 4331 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4332 } 4333 } else { /* processes non partecipating to coarse solver (if any) */ 4334 coarse_mat = 0; 4335 } 4336 ierr = PetscFree(isarray);CHKERRQ(ierr); 4337 #if 0 4338 { 4339 PetscViewer viewer; 4340 char filename[256]; 4341 sprintf(filename,"coarse_mat.m"); 4342 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4343 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4344 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4345 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4346 } 4347 #endif 4348 4349 /* Compute coarse null space (special handling by BDDC only) */ 4350 if (pcbddc->NullSpace) { 4351 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4352 } 4353 4354 if (pcbddc->coarse_ksp) { 4355 Vec crhs,csol; 4356 PetscBool ispreonly; 4357 if (CoarseNullSpace) { 4358 if (isbddc) { 4359 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4360 } else { 4361 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4362 } 4363 } 4364 /* setup coarse ksp */ 4365 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4366 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4367 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4368 /* hack */ 4369 if (!csol) { 4370 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4371 } 4372 if (!crhs) { 4373 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4374 } 4375 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4376 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4377 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4378 KSP check_ksp; 4379 KSPType check_ksp_type; 4380 PC check_pc; 4381 Vec check_vec,coarse_vec; 4382 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4383 PetscInt its; 4384 PetscBool compute_eigs; 4385 PetscReal *eigs_r,*eigs_c; 4386 PetscInt neigs; 4387 const char *prefix; 4388 4389 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4390 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4391 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4392 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4393 if (ispreonly) { 4394 check_ksp_type = KSPPREONLY; 4395 compute_eigs = PETSC_FALSE; 4396 } else { 4397 check_ksp_type = KSPGMRES; 4398 compute_eigs = PETSC_TRUE; 4399 } 4400 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4401 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4402 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4403 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4404 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4405 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4406 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4407 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4408 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4409 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4410 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4411 /* create random vec */ 4412 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4413 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4414 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4415 if (CoarseNullSpace) { 4416 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4417 } 4418 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4419 /* solve coarse problem */ 4420 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4421 if (CoarseNullSpace) { 4422 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4423 } 4424 /* set eigenvalue estimation if preonly has not been requested */ 4425 if (compute_eigs) { 4426 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4427 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4428 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4429 lambda_max = eigs_r[neigs-1]; 4430 lambda_min = eigs_r[0]; 4431 if (pcbddc->use_coarse_estimates) { 4432 if (lambda_max>lambda_min) { 4433 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4434 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4435 } 4436 } 4437 } 4438 4439 /* check coarse problem residual error */ 4440 if (pcbddc->dbg_flag) { 4441 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4442 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4443 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4444 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4445 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4446 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4447 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4448 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4449 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4450 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4451 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4452 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4453 if (compute_eigs) { 4454 PetscReal lambda_max_s,lambda_min_s; 4455 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4456 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4457 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4458 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4459 for (i=0;i<neigs;i++) { 4460 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4461 } 4462 } 4463 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4464 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4465 } 4466 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4467 if (compute_eigs) { 4468 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4469 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4470 } 4471 } 4472 } 4473 /* print additional info */ 4474 if (pcbddc->dbg_flag) { 4475 /* waits until all processes reaches this point */ 4476 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4479 } 4480 4481 /* free memory */ 4482 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4483 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 #undef __FUNCT__ 4488 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4489 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4490 { 4491 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4492 PC_IS* pcis = (PC_IS*)pc->data; 4493 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4494 PetscInt i,coarse_size; 4495 PetscInt *local_primal_indices; 4496 PetscErrorCode ierr; 4497 4498 PetscFunctionBegin; 4499 /* Compute global number of coarse dofs */ 4500 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4501 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4502 } 4503 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4504 4505 /* check numbering */ 4506 if (pcbddc->dbg_flag) { 4507 PetscScalar coarsesum,*array; 4508 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4509 4510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4511 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4512 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4513 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4514 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4515 for (i=0;i<pcbddc->local_primal_size;i++) { 4516 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4517 } 4518 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4519 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4520 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4521 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4522 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4523 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4524 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4525 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4526 for (i=0;i<pcis->n;i++) { 4527 if (array[i] == 1.0) { 4528 set_error = PETSC_TRUE; 4529 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4530 } 4531 } 4532 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4533 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4534 for (i=0;i<pcis->n;i++) { 4535 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4536 } 4537 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4538 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4539 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4541 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4543 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4544 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4545 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4547 for (i=0;i<pcbddc->local_primal_size;i++) { 4548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4549 } 4550 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4551 } 4552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4553 if (set_error_reduced) { 4554 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4555 } 4556 } 4557 /* get back data */ 4558 *coarse_size_n = coarse_size; 4559 *local_primal_indices_n = local_primal_indices; 4560 PetscFunctionReturn(0); 4561 } 4562 4563 #undef __FUNCT__ 4564 #define __FUNCT__ "PCBDDCGlobalToLocal" 4565 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4566 { 4567 IS localis_t; 4568 PetscInt i,lsize,*idxs,n; 4569 PetscScalar *vals; 4570 PetscErrorCode ierr; 4571 4572 PetscFunctionBegin; 4573 /* get indices in local ordering exploiting local to global map */ 4574 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4575 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4576 for (i=0;i<lsize;i++) vals[i] = 1.0; 4577 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4578 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4579 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4580 if (idxs) { /* multilevel guard */ 4581 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4582 } 4583 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4584 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4585 ierr = PetscFree(vals);CHKERRQ(ierr); 4586 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4587 /* now compute set in local ordering */ 4588 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4589 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4590 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4591 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4592 for (i=0,lsize=0;i<n;i++) { 4593 if (PetscRealPart(vals[i]) > 0.5) { 4594 lsize++; 4595 } 4596 } 4597 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4598 for (i=0,lsize=0;i<n;i++) { 4599 if (PetscRealPart(vals[i]) > 0.5) { 4600 idxs[lsize++] = i; 4601 } 4602 } 4603 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4604 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4605 *localis = localis_t; 4606 PetscFunctionReturn(0); 4607 } 4608 4609 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4610 #undef __FUNCT__ 4611 #define __FUNCT__ "PCBDDCMatMult_Private" 4612 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4613 { 4614 PCBDDCChange_ctx change_ctx; 4615 PetscErrorCode ierr; 4616 4617 PetscFunctionBegin; 4618 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4619 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4620 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4621 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4622 PetscFunctionReturn(0); 4623 } 4624 4625 #undef __FUNCT__ 4626 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4627 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4628 { 4629 PCBDDCChange_ctx change_ctx; 4630 PetscErrorCode ierr; 4631 4632 PetscFunctionBegin; 4633 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4634 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4635 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4636 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4637 PetscFunctionReturn(0); 4638 } 4639 4640 #undef __FUNCT__ 4641 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4642 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4643 { 4644 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4645 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4646 PetscInt *used_xadj,*used_adjncy; 4647 PetscBool free_used_adj; 4648 PetscErrorCode ierr; 4649 4650 PetscFunctionBegin; 4651 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4652 free_used_adj = PETSC_FALSE; 4653 if (pcbddc->sub_schurs_layers == -1) { 4654 used_xadj = NULL; 4655 used_adjncy = NULL; 4656 } else { 4657 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4658 used_xadj = pcbddc->mat_graph->xadj; 4659 used_adjncy = pcbddc->mat_graph->adjncy; 4660 } else if (pcbddc->computed_rowadj) { 4661 used_xadj = pcbddc->mat_graph->xadj; 4662 used_adjncy = pcbddc->mat_graph->adjncy; 4663 } else { 4664 Mat mat_adj; 4665 PetscBool flg_row=PETSC_TRUE; 4666 const PetscInt *xadj,*adjncy; 4667 PetscInt nvtxs; 4668 4669 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4670 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4671 if (!flg_row) { 4672 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4673 } 4674 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4675 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4676 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4677 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4678 if (!flg_row) { 4679 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4680 } 4681 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4682 free_used_adj = PETSC_TRUE; 4683 } 4684 } 4685 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4686 4687 /* free adjacency */ 4688 if (free_used_adj) { 4689 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4690 } 4691 PetscFunctionReturn(0); 4692 } 4693 4694 #undef __FUNCT__ 4695 #define __FUNCT__ "PCBDDCInitSubSchurs" 4696 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4697 { 4698 PC_IS *pcis=(PC_IS*)pc->data; 4699 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4700 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4701 PCBDDCGraph graph; 4702 Mat S_j; 4703 PetscErrorCode ierr; 4704 4705 PetscFunctionBegin; 4706 /* attach interface graph for determining subsets */ 4707 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4708 IS verticesIS; 4709 4710 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4711 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4712 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4713 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4714 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4715 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4716 /* 4717 if (pcbddc->dbg_flag) { 4718 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4719 } 4720 */ 4721 } else { 4722 graph = pcbddc->mat_graph; 4723 } 4724 4725 /* Create Schur complement matrix */ 4726 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4727 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4728 4729 /* sub_schurs init */ 4730 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4731 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4732 /* free graph struct */ 4733 if (pcbddc->sub_schurs_rebuild) { 4734 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4735 } 4736 PetscFunctionReturn(0); 4737 } 4738