1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 n = PetscMax(n,nmin); 66 cum += subset_size*n; 67 cum2 += n; 68 maxneigs = PetscMax(maxneigs,n); 69 } 70 } 71 72 if (mss) { 73 if (symmetric) { 74 PetscBLASInt B_itype = 1; 75 PetscBLASInt B_N = mss; 76 PetscScalar zero = 0.0; 77 PetscScalar eps = 0.0; /* dlamch? */ 78 79 B_lwork = -1; 80 S = NULL; 81 St = NULL; 82 eigs = NULL; 83 eigv = NULL; 84 B_iwork = NULL; 85 B_ifail = NULL; 86 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 87 #if defined(PETSC_USE_COMPLEX) 88 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 89 #else 90 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 91 #endif 92 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 93 ierr = PetscFPTrapPop();CHKERRQ(ierr); 94 } else { 95 /* TODO */ 96 } 97 } else { 98 lwork = 0; 99 } 100 101 nv = 0; 102 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 103 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 104 } 105 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 106 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 107 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 108 #if defined(PETSC_USE_COMPLEX) 109 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 110 #endif 111 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 112 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 113 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 114 nv+cum,&pcbddc->adaptive_constraints_idxs, 115 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 116 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 117 118 maxneigs = 0; 119 cum = cum2 = cumarray = 0; 120 if (sub_schurs->is_Ej_com) { 121 const PetscInt *idxs; 122 123 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 124 for (cum=0;cum<nv;cum++) { 125 pcbddc->adaptive_constraints_n[cum] = 1; 126 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 127 pcbddc->adaptive_constraints_ptrs[cum] = cum; 128 pcbddc->adaptive_constraints_data[cum] = 1.0; 129 } 130 cum2 = cum; 131 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 132 } 133 134 if (mss) { /* multilevel */ 135 if (pcbddc->use_deluxe_scaling) { 136 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 137 } else { 138 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 139 } 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 141 } 142 143 for (i=0;i<sub_schurs->n_subs;i++) { 144 PetscInt j,subset_size; 145 146 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 147 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 148 const PetscInt *idxs; 149 PetscScalar one = 1.0,zero=0.0; 150 PetscBLASInt B_N; 151 152 /* S should be copied since we need it for deluxe scaling */ 153 if (symmetric) { 154 PetscInt j; 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 for (j=0;j<subset_size;j++) { 159 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 160 } 161 } else { 162 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 } 165 /* is always this the right matrix? */ 166 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 167 168 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 169 /* St = Starray+cumarray; */ 170 171 if (symmetric) { 172 PetscBLASInt B_itype = 1; 173 PetscBLASInt B_IL = 1, B_IU; 174 PetscScalar eps = -1.0; /* dlamch? */ 175 176 /* ask for eigenvalues lower than thresh */ 177 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 178 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 #if defined(PETSC_USE_COMPLEX) 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 182 #else 183 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 184 #endif 185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 186 if (B_ierr) { 187 if (B_ierr < 0 ) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 189 } else if (B_ierr <= B_N) { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 191 } else { 192 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 193 } 194 } 195 196 if (B_neigs > nmax) { 197 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 198 B_neigs = nmax; 199 } 200 201 if (B_neigs < nmin) { 202 PetscBLASInt B_neigs2; 203 204 B_IL = B_neigs + 1; 205 ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr); 206 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 207 if (symmetric) { 208 PetscInt j; 209 for (j=0;j<subset_size;j++) { 210 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 211 } 212 for (j=0;j<subset_size;j++) { 213 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 214 } 215 } else { 216 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 217 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 } 219 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 220 #if defined(PETSC_USE_COMPLEX) 221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 222 #else 223 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 224 #endif 225 ierr = PetscFPTrapPop();CHKERRQ(ierr); 226 B_neigs += B_neigs2; 227 } 228 if (B_ierr) { 229 if (B_ierr < 0 ) { 230 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 231 } else if (B_ierr <= B_N) { 232 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 233 } else { 234 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 235 } 236 } 237 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 238 for (j=0;j<B_neigs;j++) { 239 if (eigs[j] == 0.0) { 240 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 241 } else { 242 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 243 } 244 } 245 } else { 246 /* TODO */ 247 } 248 maxneigs = PetscMax(B_neigs,maxneigs); 249 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 250 251 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 252 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 253 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 255 for (j=0;j<B_neigs;j++) { 256 #if 0 257 { 258 PetscBLASInt Blas_N,Blas_one = 1.0; 259 PetscScalar norm; 260 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 261 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 262 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 263 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 264 } else { 265 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 266 } 267 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 268 } 269 #endif 270 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 271 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 272 cum2 += subset_size; 273 } 274 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 275 } 276 /* shift for next computation */ 277 cumarray += subset_size*subset_size; 278 } 279 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 280 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 281 282 if (mss) { 283 if (pcbddc->use_deluxe_scaling) { 284 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 285 } else { 286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 287 } 288 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 289 } 290 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 291 #if defined(PETSC_USE_COMPLEX) 292 ierr = PetscFree(rwork);CHKERRQ(ierr); 293 #endif 294 if (pcbddc->dbg_flag) { 295 PetscInt maxneigs_r; 296 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 297 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 298 } 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "PCBDDCSetUpSolvers" 304 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 305 { 306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 307 PetscScalar *coarse_submat_vals; 308 PetscErrorCode ierr; 309 310 PetscFunctionBegin; 311 /* Setup local scatters R_to_B and (optionally) R_to_D */ 312 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 313 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 314 315 /* Setup local neumann solver ksp_R */ 316 /* PCBDDCSetUpLocalScatters should be called first! */ 317 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 318 319 /* Change global null space passed in by the user if change of basis has been requested */ 320 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 321 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 322 } 323 324 /* 325 Setup local correction and local part of coarse basis. 326 Gives back the dense local part of the coarse matrix in column major ordering 327 */ 328 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 329 330 /* Compute total number of coarse nodes and setup coarse solver */ 331 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 332 333 /* free */ 334 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNCT__ 339 #define __FUNCT__ "PCBDDCResetCustomization" 340 PetscErrorCode PCBDDCResetCustomization(PC pc) 341 { 342 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 347 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 348 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 349 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 350 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 351 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 352 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 353 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 354 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 355 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 356 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "PCBDDCResetTopography" 362 PetscErrorCode PCBDDCResetTopography(PC pc) 363 { 364 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 369 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 370 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 371 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 372 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "PCBDDCResetSolvers" 378 PetscErrorCode PCBDDCResetSolvers(PC pc) 379 { 380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 381 PetscScalar *array; 382 PetscErrorCode ierr; 383 384 PetscFunctionBegin; 385 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 386 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 387 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 388 ierr = PetscFree(array);CHKERRQ(ierr); 389 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 390 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 391 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 392 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 393 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 394 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 395 if (pcbddc->local_auxmat2) { 396 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 397 ierr = PetscFree(array);CHKERRQ(ierr); 398 } 399 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 401 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 402 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 404 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 405 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 406 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 407 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 408 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 409 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 411 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 412 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 414 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 420 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 421 { 422 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 423 PC_IS *pcis = (PC_IS*)pc->data; 424 VecType impVecType; 425 PetscInt n_constraints,n_R,old_size; 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 if (!pcbddc->ConstraintMatrix) { 430 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 431 } 432 /* get sizes */ 433 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 434 n_R = pcis->n-pcbddc->n_actual_vertices; 435 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 436 /* local work vectors (try to avoid unneeded work)*/ 437 /* R nodes */ 438 old_size = -1; 439 if (pcbddc->vec1_R) { 440 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 441 } 442 if (n_R != old_size) { 443 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 444 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 445 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 446 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 447 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 448 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 449 } 450 /* local primal dofs */ 451 old_size = -1; 452 if (pcbddc->vec1_P) { 453 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 454 } 455 if (pcbddc->local_primal_size != old_size) { 456 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 459 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 460 } 461 /* local explicit constraints */ 462 old_size = -1; 463 if (pcbddc->vec1_C) { 464 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 465 } 466 if (n_constraints && n_constraints != old_size) { 467 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 468 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 469 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 470 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 471 } 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "PCBDDCSetUpCorrection" 477 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 478 { 479 PetscErrorCode ierr; 480 /* pointers to pcis and pcbddc */ 481 PC_IS* pcis = (PC_IS*)pc->data; 482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 483 /* submatrices of local problem */ 484 Mat A_RV,A_VR,A_VV; 485 /* submatrices of local coarse problem */ 486 Mat S_VV,S_CV,S_VC,S_CC; 487 /* working matrices */ 488 Mat C_CR; 489 /* additional working stuff */ 490 PC pc_R; 491 Mat F; 492 PetscBool isLU,isCHOL,isILU; 493 494 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 495 PetscScalar *work; 496 PetscInt *idx_V_B; 497 PetscInt n,n_vertices,n_constraints; 498 PetscInt i,n_R,n_D,n_B; 499 PetscBool unsymmetric_check; 500 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 501 MatType impMatType; 502 /* some shortcuts to scalars */ 503 PetscScalar one=1.0,m_one=-1.0; 504 505 PetscFunctionBegin; 506 /* get number of vertices (corners plus constraints with change of basis) 507 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 508 n_vertices = pcbddc->n_actual_vertices; 509 n_constraints = pcbddc->local_primal_size-n_vertices; 510 /* Set Non-overlapping dimensions */ 511 n_B = pcis->n_B; n_D = pcis->n - n_B; 512 n_R = pcis->n-n_vertices; 513 514 /* Set types for local objects needed by BDDC precondtioner */ 515 impMatType = MATSEQDENSE; 516 517 /* vertices in boundary numbering */ 518 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 519 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 520 if (i != n_vertices) { 521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 522 } 523 524 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 525 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 526 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 527 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 528 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 529 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 530 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 531 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 532 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 533 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 534 535 unsymmetric_check = PETSC_FALSE; 536 /* allocate workspace */ 537 n = 0; 538 if (n_constraints) { 539 n += n_R*n_constraints; 540 } 541 if (n_vertices) { 542 n = PetscMax(2*n_R*n_vertices,n); 543 } 544 if (!pcbddc->issym) { 545 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 546 unsymmetric_check = PETSC_TRUE; 547 } 548 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 549 550 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 551 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 552 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 553 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 554 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 555 if (isLU || isILU || isCHOL) { 556 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 557 } else { 558 F = NULL; 559 } 560 561 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 562 if (n_constraints) { 563 Mat M1,M2,M3; 564 IS is_aux; 565 /* see if we can save some allocations */ 566 if (pcbddc->local_auxmat2) { 567 PetscInt on_R,on_constraints; 568 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 569 if (on_R != n_R || on_constraints != n_constraints) { 570 PetscScalar *marray; 571 572 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 573 ierr = PetscFree(marray);CHKERRQ(ierr); 574 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 575 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 576 } 577 } 578 /* auxiliary matrices */ 579 if (!pcbddc->local_auxmat2) { 580 PetscScalar *marray; 581 582 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 584 marray += n_R*n_constraints; 585 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 588 /* Extract constraints on R nodes: C_{CR} */ 589 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 590 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 591 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 592 593 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 594 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 595 for (i=0;i<n_constraints;i++) { 596 const PetscScalar *row_cmat_values; 597 const PetscInt *row_cmat_indices; 598 PetscInt size_of_constraint,j; 599 600 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 601 for (j=0;j<size_of_constraint;j++) { 602 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 603 } 604 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 605 } 606 if (F) { 607 Mat B; 608 609 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 610 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 611 ierr = MatDestroy(&B);CHKERRQ(ierr); 612 } else { 613 PetscScalar *xarray; 614 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 615 for (i=0;i<n_constraints;i++) { 616 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 617 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 618 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 619 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 620 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 621 } 622 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 623 } 624 625 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 626 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 627 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 628 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 629 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 630 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 631 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 632 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 633 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 634 ierr = MatDestroy(&M2);CHKERRQ(ierr); 635 ierr = MatDestroy(&M3);CHKERRQ(ierr); 636 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 637 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 638 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 639 ierr = MatDestroy(&M1);CHKERRQ(ierr); 640 } 641 642 /* Get submatrices from subdomain matrix */ 643 if (n_vertices) { 644 Mat newmat; 645 IS is_aux; 646 PetscInt ibs,mbs; 647 PetscBool issbaij; 648 649 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 650 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 651 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 652 if (ibs != mbs) { /* need to convert to SEQAIJ */ 653 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 654 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 655 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 656 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 657 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 658 } else { 659 /* this is safe */ 660 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 661 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 662 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 663 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 665 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 666 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 667 } else { 668 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 669 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 670 } 671 } 672 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 673 } 674 675 /* Matrix of coarse basis functions (local) */ 676 if (pcbddc->coarse_phi_B) { 677 PetscInt on_B,on_primal,on_D=n_D; 678 if (pcbddc->coarse_phi_D) { 679 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 680 } 681 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 682 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 683 PetscScalar *marray; 684 685 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 686 ierr = PetscFree(marray);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 688 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 689 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 690 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 691 } 692 } 693 694 if (!pcbddc->coarse_phi_B) { 695 PetscScalar *marray; 696 697 n = n_B*pcbddc->local_primal_size; 698 if (pcbddc->switch_static || pcbddc->dbg_flag) { 699 n += n_D*pcbddc->local_primal_size; 700 } 701 if (!pcbddc->issym) { 702 n *= 2; 703 } 704 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 706 n = n_B*pcbddc->local_primal_size; 707 if (pcbddc->switch_static || pcbddc->dbg_flag) { 708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 714 n = n_B*pcbddc->local_primal_size; 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 716 } 717 } else { 718 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 719 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 720 if (pcbddc->switch_static || pcbddc->dbg_flag) { 721 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 722 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 723 } 724 } 725 } 726 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 727 /* vertices */ 728 if (n_vertices) { 729 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 730 PetscScalar *x,*y; 731 PetscBLASInt B_N,B_one = 1; 732 733 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 735 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 736 if (F) { 737 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 738 } else { 739 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 740 for (i=0;i<n_vertices;i++) { 741 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 742 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 743 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 744 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 745 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 746 } 747 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 748 } 749 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 750 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 751 if (n_constraints) { 752 Mat B; 753 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 755 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 756 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 757 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 758 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 759 ierr = MatDestroy(&B);CHKERRQ(ierr); 760 } 761 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 762 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 763 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 764 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 765 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 766 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 767 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 768 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 769 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 770 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 771 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 772 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 773 /* coarse basis functions */ 774 for (i=0;i<n_vertices;i++) { 775 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 777 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 y[n_B*i+idx_V_B[i]] = 1.0; 781 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 782 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 783 784 if (pcbddc->switch_static || pcbddc->dbg_flag) { 785 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 786 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 787 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 790 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 791 } 792 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 793 } 794 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 795 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 796 } 797 798 if (n_constraints) { 799 Mat B; 800 801 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 802 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 803 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 804 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 805 if (n_vertices) { 806 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 807 } 808 ierr = MatDestroy(&B);CHKERRQ(ierr); 809 /* coarse basis functions */ 810 for (i=0;i<n_constraints;i++) { 811 PetscScalar *y; 812 813 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 814 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 815 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 816 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 818 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 819 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 820 821 if (pcbddc->switch_static || pcbddc->dbg_flag) { 822 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 823 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 824 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 827 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 828 } 829 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 830 } 831 } 832 833 /* compute other basis functions for non-symmetric problems */ 834 if (!pcbddc->issym) { 835 Mat B,X; 836 837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 838 839 if (n_constraints) { 840 Mat S_VCT,S_CCT,B_C; 841 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 843 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 844 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 845 if (n_vertices) { 846 Mat B_V; 847 848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 849 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 850 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 851 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 852 } 853 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 854 } 855 if (n_vertices) { 856 Mat A_VRT; 857 PetscBLASInt B_N,B_one = 1; 858 859 if (!n_constraints) { /* if there are no constraints, reset work */ 860 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 861 } 862 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 863 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 864 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 865 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 866 } 867 868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 869 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 870 for (i=0;i<pcbddc->local_primal_size;i++) { 871 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 872 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 873 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 874 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 875 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 876 } 877 } else { 878 for (i=0;i<pcbddc->local_primal_size;i++) { 879 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 880 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 881 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 882 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 883 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 884 } 885 } 886 ierr = MatDestroy(&B);CHKERRQ(ierr); 887 /* coarse basis functions */ 888 for (i=0;i<pcbddc->local_primal_size;i++) { 889 PetscScalar *y; 890 891 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 892 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 893 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 894 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 if (i<n_vertices) { 897 y[n_B*i+idx_V_B[i]] = 1.0; 898 } 899 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 900 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 901 902 if (pcbddc->switch_static || pcbddc->dbg_flag) { 903 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 904 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 905 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 908 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 909 } 910 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 911 } 912 ierr = MatDestroy(&X);CHKERRQ(ierr); 913 } 914 915 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 916 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 917 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 919 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 920 /* Checking coarse_sub_mat and coarse basis functios */ 921 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 922 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 923 if (pcbddc->dbg_flag) { 924 Mat coarse_sub_mat; 925 Mat AUXMAT,TM1,TM2,TM3,TM4; 926 Mat coarse_phi_D,coarse_phi_B; 927 Mat coarse_psi_D,coarse_psi_B; 928 Mat A_II,A_BB,A_IB,A_BI; 929 MatType checkmattype=MATSEQAIJ; 930 PetscReal real_value; 931 932 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 933 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 934 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 935 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 936 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 937 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 938 if (unsymmetric_check) { 939 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 940 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 941 } 942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 943 944 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 947 if (unsymmetric_check) { 948 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 949 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 950 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 951 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 952 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 953 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 954 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 955 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 956 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 957 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 958 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 959 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 960 } else { 961 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 962 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 963 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 964 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 965 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 966 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 967 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 968 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 969 } 970 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 971 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 972 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 973 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 974 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 975 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 976 977 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 980 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 981 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 982 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 983 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 984 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 985 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 986 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 987 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 988 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 989 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 990 if (unsymmetric_check) { 991 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 992 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 993 } 994 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 995 } 996 997 /* free memory */ 998 ierr = PetscFree(work);CHKERRQ(ierr); 999 if (n_vertices) { 1000 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1001 } 1002 if (n_constraints) { 1003 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1004 } 1005 /* get back data */ 1006 *coarse_submat_vals_n = coarse_submat_vals; 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1012 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1013 { 1014 Mat *work_mat; 1015 IS isrow_s,iscol_s; 1016 PetscBool rsorted,csorted; 1017 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1022 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1023 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1024 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1025 1026 if (!rsorted) { 1027 const PetscInt *idxs; 1028 PetscInt *idxs_sorted,i; 1029 1030 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1031 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1032 for (i=0;i<rsize;i++) { 1033 idxs_perm_r[i] = i; 1034 } 1035 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1036 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1037 for (i=0;i<rsize;i++) { 1038 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1039 } 1040 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1041 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1042 } else { 1043 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1044 isrow_s = isrow; 1045 } 1046 1047 if (!csorted) { 1048 if (isrow == iscol) { 1049 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1050 iscol_s = isrow_s; 1051 } else { 1052 const PetscInt *idxs; 1053 PetscInt *idxs_sorted,i; 1054 1055 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1056 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1057 for (i=0;i<csize;i++) { 1058 idxs_perm_c[i] = i; 1059 } 1060 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1061 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1062 for (i=0;i<csize;i++) { 1063 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1064 } 1065 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1066 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1067 } 1068 } else { 1069 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1070 iscol_s = iscol; 1071 } 1072 1073 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1074 1075 if (!rsorted || !csorted) { 1076 Mat new_mat; 1077 IS is_perm_r,is_perm_c; 1078 1079 if (!rsorted) { 1080 PetscInt *idxs_r,i; 1081 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1082 for (i=0;i<rsize;i++) { 1083 idxs_r[idxs_perm_r[i]] = i; 1084 } 1085 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1087 } else { 1088 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1089 } 1090 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1091 1092 if (!csorted) { 1093 if (isrow_s == iscol_s) { 1094 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1095 is_perm_c = is_perm_r; 1096 } else { 1097 PetscInt *idxs_c,i; 1098 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1099 for (i=0;i<csize;i++) { 1100 idxs_c[idxs_perm_c[i]] = i; 1101 } 1102 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1103 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1104 } 1105 } else { 1106 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1107 } 1108 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1109 1110 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1111 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1112 work_mat[0] = new_mat; 1113 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1114 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1115 } 1116 1117 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1118 *B = work_mat[0]; 1119 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1120 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1121 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1127 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1128 { 1129 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1130 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1131 Mat new_mat; 1132 IS is_local,is_global; 1133 PetscInt local_size; 1134 PetscBool isseqaij; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1139 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1140 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1141 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1142 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1143 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1144 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1145 1146 /* check */ 1147 if (pcbddc->dbg_flag) { 1148 Vec x,x_change; 1149 PetscReal error; 1150 1151 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1152 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1153 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1154 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1157 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1160 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1163 ierr = VecDestroy(&x);CHKERRQ(ierr); 1164 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1165 } 1166 1167 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1168 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1169 if (isseqaij) { 1170 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1171 } else { 1172 Mat work_mat; 1173 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1174 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1175 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1176 } 1177 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1178 /* 1179 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1180 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1181 */ 1182 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1188 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1189 { 1190 PC_IS* pcis = (PC_IS*)(pc->data); 1191 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1192 IS is_aux1,is_aux2; 1193 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1194 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1195 PetscInt vbs,bs; 1196 PetscBT bitmask; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* 1201 No need to setup local scatters if 1202 - primal space is unchanged 1203 AND 1204 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1205 AND 1206 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1207 */ 1208 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1209 PetscFunctionReturn(0); 1210 } 1211 /* destroy old objects */ 1212 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1213 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1214 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1215 /* Set Non-overlapping dimensions */ 1216 n_B = pcis->n_B; n_D = pcis->n - n_B; 1217 n_vertices = pcbddc->n_actual_vertices; 1218 /* create auxiliary bitmask */ 1219 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1220 for (i=0;i<n_vertices;i++) { 1221 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1222 } 1223 1224 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1225 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1226 for (i=0, n_R=0; i<pcis->n; i++) { 1227 if (!PetscBTLookup(bitmask,i)) { 1228 idx_R_local[n_R] = i; 1229 n_R++; 1230 } 1231 } 1232 1233 /* Block code */ 1234 vbs = 1; 1235 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1236 if (bs>1 && !(n_vertices%bs)) { 1237 PetscBool is_blocked = PETSC_TRUE; 1238 PetscInt *vary; 1239 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1240 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1241 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1242 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1243 for (i=0; i<n_vertices; i++) { 1244 if (vary[i]!=0 && vary[i]!=bs) { 1245 is_blocked = PETSC_FALSE; 1246 break; 1247 } 1248 } 1249 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1250 vbs = bs; 1251 for (i=0;i<n_R/vbs;i++) { 1252 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1253 } 1254 } 1255 ierr = PetscFree(vary);CHKERRQ(ierr); 1256 } 1257 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1258 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1259 1260 /* print some info if requested */ 1261 if (pcbddc->dbg_flag) { 1262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1263 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1270 } 1271 1272 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1273 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1274 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1275 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1276 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1277 for (i=0; i<n_D; i++) { 1278 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1279 } 1280 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1281 for (i=0, j=0; i<n_R; i++) { 1282 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1283 aux_array1[j++] = i; 1284 } 1285 } 1286 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1287 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1288 for (i=0, j=0; i<n_B; i++) { 1289 if (!PetscBTLookup(bitmask,is_indices[i])) { 1290 aux_array2[j++] = i; 1291 } 1292 } 1293 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1294 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1295 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1296 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1297 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1298 1299 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1300 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1301 for (i=0, j=0; i<n_R; i++) { 1302 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1303 aux_array1[j++] = i; 1304 } 1305 } 1306 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1307 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1308 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1309 } 1310 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1311 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1318 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1319 { 1320 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1321 PC_IS *pcis = (PC_IS*)pc->data; 1322 PC pc_temp; 1323 Mat A_RR; 1324 MatReuse reuse; 1325 PetscScalar m_one = -1.0; 1326 PetscReal value; 1327 PetscInt n_D,n_R,ibs,mbs; 1328 PetscBool use_exact,use_exact_reduced,issbaij; 1329 PetscErrorCode ierr; 1330 /* prefixes stuff */ 1331 char dir_prefix[256],neu_prefix[256],str_level[16]; 1332 size_t len; 1333 1334 PetscFunctionBegin; 1335 1336 /* compute prefixes */ 1337 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1338 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1339 if (!pcbddc->current_level) { 1340 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1341 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1342 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1343 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1344 } else { 1345 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1346 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1347 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1348 len -= 15; /* remove "pc_bddc_coarse_" */ 1349 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1350 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1351 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1352 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1353 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1354 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1355 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1356 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1357 } 1358 1359 /* DIRICHLET PROBLEM */ 1360 if (dirichlet) { 1361 /* Matrix for Dirichlet problem is pcis->A_II */ 1362 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1363 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1364 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1365 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1366 /* default */ 1367 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1368 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1369 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1370 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1371 if (issbaij) { 1372 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1373 } else { 1374 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1375 } 1376 /* Allow user's customization */ 1377 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1378 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1381 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1382 if (!n_D) { 1383 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1384 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1385 } 1386 /* Set Up KSP for Dirichlet problem of BDDC */ 1387 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1388 /* set ksp_D into pcis data */ 1389 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1390 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1391 pcis->ksp_D = pcbddc->ksp_D; 1392 } 1393 1394 /* NEUMANN PROBLEM */ 1395 A_RR = 0; 1396 if (neumann) { 1397 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1398 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1399 if (pcbddc->ksp_R) { /* already created ksp */ 1400 PetscInt nn_R; 1401 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1402 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1403 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1404 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1405 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1406 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1407 reuse = MAT_INITIAL_MATRIX; 1408 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1409 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1410 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1411 reuse = MAT_INITIAL_MATRIX; 1412 } else { /* safe to reuse the matrix */ 1413 reuse = MAT_REUSE_MATRIX; 1414 } 1415 } 1416 /* last check */ 1417 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1418 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1419 reuse = MAT_INITIAL_MATRIX; 1420 } 1421 } else { /* first time, so we need to create the matrix */ 1422 reuse = MAT_INITIAL_MATRIX; 1423 } 1424 /* extract A_RR */ 1425 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1426 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1427 if (ibs != mbs) { 1428 Mat newmat; 1429 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1430 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1431 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1432 } else { 1433 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1434 } 1435 if (!pcbddc->ksp_R) { /* create object if not present */ 1436 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1437 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1438 /* default */ 1439 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1440 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1441 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1443 if (issbaij) { 1444 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1445 } else { 1446 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1447 } 1448 /* Allow user's customization */ 1449 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1450 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1451 } 1452 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1453 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1454 if (!n_R) { 1455 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1456 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1457 } 1458 /* Set Up KSP for Neumann problem of BDDC */ 1459 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1460 } 1461 1462 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1463 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1464 if (pcbddc->dbg_flag) { 1465 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1468 } 1469 if (dirichlet) { /* Dirichlet */ 1470 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1471 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1472 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1473 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1474 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1475 /* need to be adapted? */ 1476 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1477 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1478 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1479 /* print info */ 1480 if (pcbddc->dbg_flag) { 1481 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1483 } 1484 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1485 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1486 } 1487 } 1488 if (neumann) { /* Neumann */ 1489 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1490 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1491 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1492 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1493 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1494 /* need to be adapted? */ 1495 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1496 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1497 /* print info */ 1498 if (pcbddc->dbg_flag) { 1499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1500 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1501 } 1502 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1503 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1504 } 1505 } 1506 } 1507 /* free Neumann problem's matrix */ 1508 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1514 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1515 { 1516 PetscErrorCode ierr; 1517 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1518 1519 PetscFunctionBegin; 1520 if (applytranspose) { 1521 if (pcbddc->local_auxmat1) { 1522 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1523 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1524 } 1525 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1526 } else { 1527 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1528 if (pcbddc->local_auxmat1) { 1529 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1530 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1531 } 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1539 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1540 { 1541 PetscErrorCode ierr; 1542 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1543 PC_IS* pcis = (PC_IS*) (pc->data); 1544 const PetscScalar zero = 0.0; 1545 1546 PetscFunctionBegin; 1547 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1548 if (applytranspose) { 1549 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1550 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1551 } else { 1552 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1553 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1554 } 1555 /* start communications from local primal nodes to rhs of coarse solver */ 1556 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1557 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 1560 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1561 /* TODO remove null space when doing multilevel */ 1562 if (pcbddc->coarse_ksp) { 1563 if (applytranspose) { 1564 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1565 } else { 1566 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1567 } 1568 } 1569 1570 /* Local solution on R nodes */ 1571 if (pcis->n) { 1572 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1573 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1575 if (pcbddc->switch_static) { 1576 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1577 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1578 } 1579 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1580 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1581 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 if (pcbddc->switch_static) { 1584 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 } 1587 } 1588 1589 /* communications from coarse sol to local primal nodes */ 1590 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1591 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1592 1593 /* Sum contributions from two levels */ 1594 if (applytranspose) { 1595 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1597 } else { 1598 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1599 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1607 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1608 { 1609 PetscErrorCode ierr; 1610 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1611 PetscScalar *array,*array2; 1612 Vec from,to; 1613 1614 PetscFunctionBegin; 1615 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1616 from = pcbddc->coarse_vec; 1617 to = pcbddc->vec1_P; 1618 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1619 Vec tvec; 1620 PetscInt lsize; 1621 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1622 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1623 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1624 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1625 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1626 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1627 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1628 } 1629 } else { /* from local to global -> put data in coarse right hand side */ 1630 from = pcbddc->vec1_P; 1631 to = pcbddc->coarse_vec; 1632 } 1633 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1639 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1640 { 1641 PetscErrorCode ierr; 1642 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1643 PetscScalar *array,*array2; 1644 Vec from,to; 1645 1646 PetscFunctionBegin; 1647 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1648 from = pcbddc->coarse_vec; 1649 to = pcbddc->vec1_P; 1650 } else { /* from local to global -> put data in coarse right hand side */ 1651 from = pcbddc->vec1_P; 1652 to = pcbddc->coarse_vec; 1653 } 1654 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1655 if (smode == SCATTER_FORWARD) { 1656 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1657 Vec tvec; 1658 PetscInt lsize; 1659 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1660 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1661 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1662 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1663 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1664 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1665 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1666 } 1667 } 1668 PetscFunctionReturn(0); 1669 } 1670 1671 /* uncomment for testing purposes */ 1672 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1675 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1676 { 1677 PetscErrorCode ierr; 1678 PC_IS* pcis = (PC_IS*)(pc->data); 1679 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1680 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1681 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1682 MatType impMatType=MATSEQAIJ; 1683 /* one and zero */ 1684 PetscScalar one=1.0,zero=0.0; 1685 /* space to store constraints and their local indices */ 1686 PetscScalar *temp_quadrature_constraint; 1687 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1688 /* iterators */ 1689 PetscInt i,j,k,total_counts,temp_start_ptr; 1690 /* stuff to store connected components stored in pcbddc->mat_graph */ 1691 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1692 PetscInt n_ISForFaces,n_ISForEdges; 1693 /* near null space stuff */ 1694 MatNullSpace nearnullsp; 1695 const Vec *nearnullvecs; 1696 Vec *localnearnullsp; 1697 PetscBool nnsp_has_cnst; 1698 PetscInt nnsp_size; 1699 PetscScalar *array; 1700 /* BLAS integers */ 1701 PetscBLASInt lwork,lierr; 1702 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1703 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1704 /* LAPACK working arrays for SVD or POD */ 1705 PetscBool skip_lapack; 1706 PetscScalar *work; 1707 PetscReal *singular_vals; 1708 #if defined(PETSC_USE_COMPLEX) 1709 PetscReal *rwork; 1710 #endif 1711 #if defined(PETSC_MISSING_LAPACK_GESVD) 1712 PetscBLASInt Blas_one_2=1; 1713 PetscScalar *temp_basis,*correlation_mat; 1714 #else 1715 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1716 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1717 #endif 1718 /* reuse */ 1719 PetscInt olocal_primal_size; 1720 PetscInt *oprimal_indices_local_idxs; 1721 /* change of basis */ 1722 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1723 PetscBool boolforchange,qr_needed; 1724 PetscBT touched,change_basis,qr_needed_idx; 1725 /* auxiliary stuff */ 1726 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1727 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1728 PetscScalar *temp_quadrature_constraint_work=NULL; 1729 /* some quantities */ 1730 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1731 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1732 1733 1734 PetscFunctionBegin; 1735 /* Destroy Mat objects computed previously */ 1736 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1737 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1738 /* TODO synch with adaptive selection */ 1739 /* Get index sets for faces, edges and vertices from graph */ 1740 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1741 /* free unneeded index sets */ 1742 if (!pcbddc->use_vertices) { 1743 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1744 } 1745 if (!pcbddc->use_edges) { 1746 for (i=0;i<n_ISForEdges;i++) { 1747 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1748 } 1749 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1750 n_ISForEdges = 0; 1751 } 1752 if (!pcbddc->use_faces) { 1753 for (i=0;i<n_ISForFaces;i++) { 1754 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1755 } 1756 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1757 n_ISForFaces = 0; 1758 } 1759 /* HACKS (the following two blocks of code) */ 1760 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1761 pcbddc->use_change_of_basis = PETSC_TRUE; 1762 if (!ISForEdges) { 1763 pcbddc->use_change_on_faces = PETSC_TRUE; 1764 } 1765 } 1766 if (pcbddc->NullSpace) { 1767 /* use_change_of_basis should be consistent among processors */ 1768 PetscBool tbool[2],gbool[2]; 1769 tbool [0] = pcbddc->use_change_of_basis; 1770 tbool [1] = pcbddc->use_change_on_faces; 1771 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1772 pcbddc->use_change_of_basis = gbool[0]; 1773 pcbddc->use_change_on_faces = gbool[1]; 1774 } 1775 /* print some info */ 1776 if (pcbddc->dbg_flag) { 1777 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1779 i = 0; 1780 if (ISForVertices) { 1781 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1782 } 1783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1786 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1787 } 1788 1789 if (!pcbddc->adaptive_selection) { 1790 /* check if near null space is attached to global mat */ 1791 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1792 if (nearnullsp) { 1793 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1794 /* remove any stored info */ 1795 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1796 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1797 /* store information for BDDC solver reuse */ 1798 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1799 pcbddc->onearnullspace = nearnullsp; 1800 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1801 for (i=0;i<nnsp_size;i++) { 1802 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1803 } 1804 } else { /* if near null space is not provided BDDC uses constants by default */ 1805 nnsp_size = 0; 1806 nnsp_has_cnst = PETSC_TRUE; 1807 } 1808 /* get max number of constraints on a single cc */ 1809 max_constraints = nnsp_size; 1810 if (nnsp_has_cnst) max_constraints++; 1811 1812 /* 1813 Evaluate maximum storage size needed by the procedure 1814 - temp_indices will contain start index of each constraint stored as follows 1815 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1816 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1817 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1818 */ 1819 total_counts = n_ISForFaces+n_ISForEdges; 1820 total_counts *= max_constraints; 1821 n_vertices = 0; 1822 if (ISForVertices) { 1823 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1824 } 1825 total_counts += n_vertices; 1826 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1827 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1828 total_counts = 0; 1829 max_size_of_constraint = 0; 1830 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1831 if (i<n_ISForEdges) { 1832 used_IS = &ISForEdges[i]; 1833 } else { 1834 used_IS = &ISForFaces[i-n_ISForEdges]; 1835 } 1836 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1837 total_counts += j; 1838 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1839 } 1840 total_counts *= max_constraints; 1841 total_counts += n_vertices; 1842 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1843 /* get local part of global near null space vectors */ 1844 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1845 for (k=0;k<nnsp_size;k++) { 1846 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1847 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1848 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1849 } 1850 1851 /* whether or not to skip lapack calls */ 1852 skip_lapack = PETSC_TRUE; 1853 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1854 1855 /* allocate some auxiliary stuff */ 1856 if (!skip_lapack || pcbddc->use_qr_single) { 1857 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1858 } 1859 1860 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1861 if (!skip_lapack) { 1862 PetscScalar temp_work; 1863 1864 #if defined(PETSC_MISSING_LAPACK_GESVD) 1865 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1866 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1867 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1868 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1869 #if defined(PETSC_USE_COMPLEX) 1870 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1871 #endif 1872 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1873 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1874 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1875 lwork = -1; 1876 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1877 #if !defined(PETSC_USE_COMPLEX) 1878 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1879 #else 1880 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1881 #endif 1882 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1883 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1884 #else /* on missing GESVD */ 1885 /* SVD */ 1886 PetscInt max_n,min_n; 1887 max_n = max_size_of_constraint; 1888 min_n = max_constraints; 1889 if (max_size_of_constraint < max_constraints) { 1890 min_n = max_size_of_constraint; 1891 max_n = max_constraints; 1892 } 1893 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1894 #if defined(PETSC_USE_COMPLEX) 1895 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1896 #endif 1897 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1898 lwork = -1; 1899 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1902 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1903 #if !defined(PETSC_USE_COMPLEX) 1904 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1905 #else 1906 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1907 #endif 1908 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1909 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1910 #endif /* on missing GESVD */ 1911 /* Allocate optimal workspace */ 1912 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1913 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1914 } 1915 /* Now we can loop on constraining sets */ 1916 total_counts = 0; 1917 temp_indices[0] = 0; 1918 /* vertices */ 1919 if (ISForVertices) { 1920 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1921 if (nnsp_has_cnst) { /* consider all vertices */ 1922 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1923 for (i=0;i<n_vertices;i++) { 1924 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1925 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1926 total_counts++; 1927 } 1928 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1929 PetscBool used_vertex; 1930 for (i=0;i<n_vertices;i++) { 1931 used_vertex = PETSC_FALSE; 1932 k = 0; 1933 while (!used_vertex && k<nnsp_size) { 1934 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1935 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1936 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1937 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1938 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1939 total_counts++; 1940 used_vertex = PETSC_TRUE; 1941 } 1942 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1943 k++; 1944 } 1945 } 1946 } 1947 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1948 n_vertices = total_counts; 1949 } 1950 1951 /* edges and faces */ 1952 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1953 if (ncc<n_ISForEdges) { 1954 used_IS = &ISForEdges[ncc]; 1955 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1956 } else { 1957 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1958 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1959 } 1960 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1961 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1962 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1963 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1964 /* change of basis should not be performed on local periodic nodes */ 1965 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1966 if (nnsp_has_cnst) { 1967 PetscScalar quad_value; 1968 temp_constraints++; 1969 if (!pcbddc->use_nnsp_true) { 1970 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1971 } else { 1972 quad_value = 1.0; 1973 } 1974 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1975 for (j=0;j<size_of_constraint;j++) { 1976 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1977 } 1978 /* sort by global ordering if using lapack subroutines (not needed!) */ 1979 if (!skip_lapack || pcbddc->use_qr_single) { 1980 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1981 for (j=0;j<size_of_constraint;j++) { 1982 permutation[j]=j; 1983 } 1984 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1985 for (j=0;j<size_of_constraint;j++) { 1986 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 1987 } 1988 for (j=0;j<size_of_constraint;j++) { 1989 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1990 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1991 } 1992 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1993 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1994 } 1995 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1996 total_counts++; 1997 } 1998 for (k=0;k<nnsp_size;k++) { 1999 PetscReal real_value; 2000 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2001 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2002 for (j=0;j<size_of_constraint;j++) { 2003 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2004 } 2005 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2006 /* check if array is null on the connected component */ 2007 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2008 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2009 if (real_value > 0.0) { /* keep indices and values */ 2010 /* sort by global ordering if using lapack subroutines */ 2011 if (!skip_lapack || pcbddc->use_qr_single) { 2012 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2013 for (j=0;j<size_of_constraint;j++) { 2014 permutation[j]=j; 2015 } 2016 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2017 for (j=0;j<size_of_constraint;j++) { 2018 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2019 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2020 } 2021 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2022 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2023 } 2024 temp_constraints++; 2025 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2026 total_counts++; 2027 } 2028 } 2029 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2030 valid_constraints = temp_constraints; 2031 if (!pcbddc->use_nnsp_true && temp_constraints) { 2032 if (temp_constraints == 1) { /* just normalize the constraint */ 2033 PetscScalar norm; 2034 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2035 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2036 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2037 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2038 } else { /* perform SVD */ 2039 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2040 2041 #if defined(PETSC_MISSING_LAPACK_GESVD) 2042 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2043 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2044 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2045 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2046 from that computed using LAPACKgesvd 2047 -> This is due to a different computation of eigenvectors in LAPACKheev 2048 -> The quality of the POD-computed basis will be the same */ 2049 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2050 /* Store upper triangular part of correlation matrix */ 2051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2052 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2053 for (j=0;j<temp_constraints;j++) { 2054 for (k=0;k<j+1;k++) { 2055 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2056 } 2057 } 2058 /* compute eigenvalues and eigenvectors of correlation matrix */ 2059 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2060 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2061 #if !defined(PETSC_USE_COMPLEX) 2062 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2063 #else 2064 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2065 #endif 2066 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2067 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2068 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2069 j = 0; 2070 while (j < temp_constraints && singular_vals[j] < tol) j++; 2071 total_counts = total_counts-j; 2072 valid_constraints = temp_constraints-j; 2073 /* scale and copy POD basis into used quadrature memory */ 2074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2075 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2078 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2080 if (j<temp_constraints) { 2081 PetscInt ii; 2082 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2084 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2086 for (k=0;k<temp_constraints-j;k++) { 2087 for (ii=0;ii<size_of_constraint;ii++) { 2088 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2089 } 2090 } 2091 } 2092 #else /* on missing GESVD */ 2093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2097 #if !defined(PETSC_USE_COMPLEX) 2098 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2099 #else 2100 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2101 #endif 2102 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2103 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2104 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2105 k = temp_constraints; 2106 if (k > size_of_constraint) k = size_of_constraint; 2107 j = 0; 2108 while (j < k && singular_vals[k-j-1] < tol) j++; 2109 valid_constraints = k-j; 2110 total_counts = total_counts-temp_constraints+valid_constraints; 2111 #endif /* on missing GESVD */ 2112 } 2113 } 2114 /* setting change_of_basis flag is safe now */ 2115 if (boolforchange) { 2116 for (j=0;j<valid_constraints;j++) { 2117 PetscBTSet(change_basis,total_counts-j-1); 2118 } 2119 } 2120 } 2121 /* free workspace */ 2122 if (!skip_lapack || pcbddc->use_qr_single) { 2123 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2124 } 2125 if (!skip_lapack) { 2126 ierr = PetscFree(work);CHKERRQ(ierr); 2127 #if defined(PETSC_USE_COMPLEX) 2128 ierr = PetscFree(rwork);CHKERRQ(ierr); 2129 #endif 2130 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2131 #if defined(PETSC_MISSING_LAPACK_GESVD) 2132 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2133 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2134 #endif 2135 } 2136 for (k=0;k<nnsp_size;k++) { 2137 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2138 } 2139 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2140 } else { 2141 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2142 PetscInt cum = 0; 2143 2144 total_counts = 0; 2145 n_vertices = 0; 2146 if (sub_schurs->is_Ej_com) { 2147 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2148 } 2149 max_constraints = 0; 2150 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2151 total_counts += pcbddc->adaptive_constraints_n[i]; 2152 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2153 } 2154 temp_indices = pcbddc->adaptive_constraints_ptrs; 2155 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2156 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2157 2158 #if 0 2159 printf("Found %d totals\n",total_counts); 2160 for (i=0;i<total_counts;i++) { 2161 printf("const %d, start %d",i,temp_indices[i]); 2162 printf(" end %d:\n",temp_indices[i+1]); 2163 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2164 printf(" idxs %d",temp_indices_to_constraint[j]); 2165 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2166 } 2167 } 2168 for (i=0;i<n_vertices;i++) { 2169 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2170 } 2171 for (i=0;i<sub_schurs->n_subs;i++) { 2172 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2173 } 2174 #endif 2175 2176 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2177 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2178 /* Change of basis */ 2179 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2180 if (pcbddc->use_change_of_basis) { 2181 cum = n_vertices; 2182 for (i=0;i<sub_schurs->n_subs;i++) { 2183 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2184 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2185 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2186 } 2187 } 2188 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2189 } 2190 } 2191 } 2192 2193 /* free index sets of faces, edges and vertices */ 2194 for (i=0;i<n_ISForFaces;i++) { 2195 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2196 } 2197 if (n_ISForFaces) { 2198 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2199 } 2200 for (i=0;i<n_ISForEdges;i++) { 2201 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2202 } 2203 if (n_ISForEdges) { 2204 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2205 } 2206 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2207 2208 /* map temp_indices_to_constraint in boundary numbering */ 2209 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2210 if (i != temp_indices[total_counts]) { 2211 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2212 } 2213 2214 /* set quantities in pcbddc data structure and store previous primal size */ 2215 /* n_vertices defines the number of subdomain corners in the primal space */ 2216 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2217 olocal_primal_size = pcbddc->local_primal_size; 2218 pcbddc->local_primal_size = total_counts; 2219 pcbddc->n_vertices = n_vertices; 2220 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2221 2222 /* Create constraint matrix */ 2223 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2224 /* so we need to set it up properly either with or without change of basis */ 2225 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2226 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2227 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2228 /* array to compute a local numbering of constraints : vertices first then constraints */ 2229 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2230 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2231 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2232 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2233 /* auxiliary stuff for basis change */ 2234 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2235 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2236 2237 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2238 total_primal_vertices=0; 2239 for (i=0;i<pcbddc->local_primal_size;i++) { 2240 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2241 if (size_of_constraint == 1) { 2242 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2243 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2244 aux_primal_minloc[total_primal_vertices]=0; 2245 total_primal_vertices++; 2246 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2247 PetscInt min_loc,min_index; 2248 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2249 /* find first untouched local node */ 2250 k = 0; 2251 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2252 min_index = global_indices[k]; 2253 min_loc = k; 2254 /* search the minimum among global nodes already untouched on the cc */ 2255 for (k=1;k<size_of_constraint;k++) { 2256 /* there can be more than one constraint on a single connected component */ 2257 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2258 min_index = global_indices[k]; 2259 min_loc = k; 2260 } 2261 } 2262 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2263 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2264 aux_primal_minloc[total_primal_vertices]=min_loc; 2265 total_primal_vertices++; 2266 } 2267 } 2268 /* determine if a QR strategy is needed for change of basis */ 2269 qr_needed = PETSC_FALSE; 2270 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2271 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2272 if (PetscBTLookup(change_basis,i)) { 2273 if (!pcbddc->use_qr_single) { 2274 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2275 j = 0; 2276 for (k=0;k<size_of_constraint;k++) { 2277 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2278 j++; 2279 } 2280 } 2281 /* found more than one primal dof on the cc */ 2282 if (j > 1) { 2283 PetscBTSet(qr_needed_idx,i); 2284 qr_needed = PETSC_TRUE; 2285 } 2286 } else { 2287 PetscBTSet(qr_needed_idx,i); 2288 qr_needed = PETSC_TRUE; 2289 } 2290 } 2291 } 2292 /* free workspace */ 2293 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2294 2295 /* permute indices in order to have a sorted set of vertices */ 2296 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2297 2298 /* nonzero structure of constraint matrix */ 2299 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2300 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2301 j=total_primal_vertices; 2302 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2303 if (!PetscBTLookup(change_basis,i)) { 2304 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2305 j++; 2306 } 2307 } 2308 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2309 ierr = PetscFree(nnz);CHKERRQ(ierr); 2310 /* set values in constraint matrix */ 2311 for (i=0;i<total_primal_vertices;i++) { 2312 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2313 } 2314 total_counts = total_primal_vertices; 2315 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2316 if (!PetscBTLookup(change_basis,i)) { 2317 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2318 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2319 total_counts++; 2320 } 2321 } 2322 /* assembling */ 2323 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2324 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2325 /* 2326 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2327 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2328 */ 2329 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2330 if (pcbddc->use_change_of_basis) { 2331 /* dual and primal dofs on a single cc */ 2332 PetscInt dual_dofs,primal_dofs; 2333 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2334 PetscInt primal_counter; 2335 /* working stuff for GEQRF */ 2336 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2337 PetscBLASInt lqr_work; 2338 /* working stuff for UNGQR */ 2339 PetscScalar *gqr_work,lgqr_work_t; 2340 PetscBLASInt lgqr_work; 2341 /* working stuff for TRTRS */ 2342 PetscScalar *trs_rhs; 2343 PetscBLASInt Blas_NRHS; 2344 /* pointers for values insertion into change of basis matrix */ 2345 PetscInt *start_rows,*start_cols; 2346 PetscScalar *start_vals; 2347 /* working stuff for values insertion */ 2348 PetscBT is_primal; 2349 /* matrix sizes */ 2350 PetscInt global_size,local_size; 2351 /* temporary change of basis */ 2352 Mat localChangeOfBasisMatrix; 2353 2354 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2355 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2356 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2357 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2358 2359 /* nonzeros for local mat */ 2360 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2361 for (i=0;i<pcis->n;i++) nnz[i]=1; 2362 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2363 if (PetscBTLookup(change_basis,i)) { 2364 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2365 if (PetscBTLookup(qr_needed_idx,i)) { 2366 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2367 } else { 2368 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2369 /* get local primal index on the cc */ 2370 j = 0; 2371 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2372 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2373 } 2374 } 2375 } 2376 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2377 ierr = PetscFree(nnz);CHKERRQ(ierr); 2378 /* Set initial identity in the matrix */ 2379 for (i=0;i<pcis->n;i++) { 2380 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2381 } 2382 2383 if (pcbddc->dbg_flag) { 2384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2386 } 2387 2388 2389 /* Now we loop on the constraints which need a change of basis */ 2390 /* 2391 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2392 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2393 2394 Basic blocks of change of basis matrix T computed by 2395 2396 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2397 2398 | 1 0 ... 0 s_1/S | 2399 | 0 1 ... 0 s_2/S | 2400 | ... | 2401 | 0 ... 1 s_{n-1}/S | 2402 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2403 2404 with S = \sum_{i=1}^n s_i^2 2405 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2406 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2407 2408 - QR decomposition of constraints otherwise 2409 */ 2410 if (qr_needed) { 2411 /* space to store Q */ 2412 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2413 /* first we issue queries for optimal work */ 2414 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2416 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2417 lqr_work = -1; 2418 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2419 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2420 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2421 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2422 lgqr_work = -1; 2423 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2424 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2425 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2426 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2427 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2428 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2430 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2431 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2432 /* array to store scaling factors for reflectors */ 2433 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2434 /* array to store rhs and solution of triangular solver */ 2435 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2436 /* allocating workspace for check */ 2437 if (pcbddc->dbg_flag) { 2438 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2439 } 2440 } 2441 /* array to store whether a node is primal or not */ 2442 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2443 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2444 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2445 if (i != total_primal_vertices) { 2446 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2447 } 2448 for (i=0;i<total_primal_vertices;i++) { 2449 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2450 } 2451 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2452 2453 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2454 /* -> using implicit ordering contained in temp_indices data */ 2455 total_counts = pcbddc->n_vertices; 2456 primal_counter = total_counts; 2457 while (total_counts<pcbddc->local_primal_size) { 2458 primal_dofs = 1; 2459 if (PetscBTLookup(change_basis,total_counts)) { 2460 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2461 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2462 primal_dofs++; 2463 } 2464 /* get constraint info */ 2465 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2466 dual_dofs = size_of_constraint-primal_dofs; 2467 2468 if (pcbddc->dbg_flag) { 2469 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2470 } 2471 2472 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2473 2474 /* copy quadrature constraints for change of basis check */ 2475 if (pcbddc->dbg_flag) { 2476 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2477 } 2478 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2479 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2480 2481 /* compute QR decomposition of constraints */ 2482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2486 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2487 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2488 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2489 2490 /* explictly compute R^-T */ 2491 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2492 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2493 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2494 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2495 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2496 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2497 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2498 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2499 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2500 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2501 2502 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2505 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2508 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2511 2512 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2513 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2514 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2516 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2517 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2519 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2521 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2522 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2523 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2524 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2525 2526 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2527 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2528 /* insert cols for primal dofs */ 2529 for (j=0;j<primal_dofs;j++) { 2530 start_vals = &qr_basis[j*size_of_constraint]; 2531 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2532 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2533 } 2534 /* insert cols for dual dofs */ 2535 for (j=0,k=0;j<dual_dofs;k++) { 2536 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2537 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2538 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2539 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2540 j++; 2541 } 2542 } 2543 2544 /* check change of basis */ 2545 if (pcbddc->dbg_flag) { 2546 PetscInt ii,jj; 2547 PetscBool valid_qr=PETSC_TRUE; 2548 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2552 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2553 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2554 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2555 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2556 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2557 for (jj=0;jj<size_of_constraint;jj++) { 2558 for (ii=0;ii<primal_dofs;ii++) { 2559 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2560 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2561 } 2562 } 2563 if (!valid_qr) { 2564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2565 for (jj=0;jj<size_of_constraint;jj++) { 2566 for (ii=0;ii<primal_dofs;ii++) { 2567 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2568 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2569 } 2570 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2571 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2572 } 2573 } 2574 } 2575 } else { 2576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2577 } 2578 } 2579 } else { /* simple transformation block */ 2580 PetscInt row,col; 2581 PetscScalar val,norm; 2582 2583 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2584 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2585 for (j=0;j<size_of_constraint;j++) { 2586 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2587 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2588 if (!PetscBTLookup(is_primal,row_B)) { 2589 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2590 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2591 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2592 } else { 2593 for (k=0;k<size_of_constraint;k++) { 2594 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2595 if (row != col) { 2596 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2597 } else { 2598 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2599 } 2600 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2601 } 2602 } 2603 } 2604 if (pcbddc->dbg_flag) { 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2606 } 2607 } 2608 /* increment primal counter */ 2609 primal_counter += primal_dofs; 2610 } else { 2611 if (pcbddc->dbg_flag) { 2612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2613 } 2614 } 2615 /* increment constraint counter total_counts */ 2616 total_counts += primal_dofs; 2617 } 2618 2619 /* free workspace */ 2620 if (qr_needed) { 2621 if (pcbddc->dbg_flag) { 2622 ierr = PetscFree(work);CHKERRQ(ierr); 2623 } 2624 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2625 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2626 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2627 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2628 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2629 } 2630 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2631 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2632 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 2634 /* assembling of global change of variable */ 2635 { 2636 Mat tmat; 2637 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2638 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2639 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2640 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2641 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2642 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2643 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2644 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2645 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2646 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2647 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2648 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2649 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2650 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2651 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2652 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2653 } 2654 /* check */ 2655 if (pcbddc->dbg_flag) { 2656 PetscReal error; 2657 Vec x,x_change; 2658 2659 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2660 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2661 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2662 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2663 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2664 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2665 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2666 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2667 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2668 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2669 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2670 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2672 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2673 ierr = VecDestroy(&x);CHKERRQ(ierr); 2674 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2675 } 2676 2677 /* adapt sub_schurs computed (if any) */ 2678 if (pcbddc->use_deluxe_scaling) { 2679 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2680 if (sub_schurs->n_subs_par_g) { 2681 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2682 } 2683 if (sub_schurs->S_Ej_all) { 2684 Mat S_1,S_2,tmat; 2685 IS is_all_N; 2686 2687 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2688 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2689 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2690 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2691 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2692 sub_schurs->S_Ej_all = S_1; 2693 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2694 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2695 sub_schurs->sum_S_Ej_all = S_2; 2696 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2697 } 2698 } 2699 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2700 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2701 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2702 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2703 } 2704 2705 /* set up change of basis context */ 2706 if (pcbddc->ChangeOfBasisMatrix) { 2707 PCBDDCChange_ctx change_ctx; 2708 2709 if (!pcbddc->new_global_mat) { 2710 PetscInt global_size,local_size; 2711 2712 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2713 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2714 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2715 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2716 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2717 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2718 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2719 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2720 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2721 } else { 2722 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2723 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2724 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2725 } 2726 if (!pcbddc->user_ChangeOfBasisMatrix) { 2727 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2728 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2729 } else { 2730 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2731 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2732 } 2733 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2734 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2735 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2737 } 2738 2739 /* get indices in local ordering for vertices and constraints */ 2740 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2741 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2742 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2743 } 2744 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2745 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2746 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2747 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2748 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2749 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2750 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2751 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2752 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2753 /* set quantities in PCBDDC data struct */ 2754 pcbddc->n_actual_vertices = i; 2755 /* check if a new primal space has been introduced */ 2756 pcbddc->new_primal_space_local = PETSC_TRUE; 2757 if (olocal_primal_size == pcbddc->local_primal_size) { 2758 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2759 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2760 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2761 } 2762 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2763 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2764 2765 /* flush dbg viewer */ 2766 if (pcbddc->dbg_flag) { 2767 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2768 } 2769 2770 /* free workspace */ 2771 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2772 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2773 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2774 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2775 if (!pcbddc->adaptive_selection) { 2776 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2777 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2778 } else { 2779 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2780 pcbddc->adaptive_constraints_ptrs, 2781 pcbddc->adaptive_constraints_idxs, 2782 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2783 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2784 } 2785 PetscFunctionReturn(0); 2786 } 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2790 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2791 { 2792 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2793 PC_IS *pcis = (PC_IS*)pc->data; 2794 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2795 PetscInt ierr,i,vertex_size; 2796 PetscViewer viewer=pcbddc->dbg_viewer; 2797 2798 PetscFunctionBegin; 2799 /* Reset previously computed graph */ 2800 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2801 /* Init local Graph struct */ 2802 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2803 2804 /* Check validity of the csr graph passed in by the user */ 2805 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2806 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2807 } 2808 2809 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2810 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2811 Mat mat_adj; 2812 PetscInt *xadj,*adjncy; 2813 PetscInt nvtxs; 2814 PetscBool flg_row=PETSC_TRUE; 2815 2816 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2817 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2818 if (!flg_row) { 2819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2820 } 2821 if (pcbddc->use_local_adj) { 2822 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2823 pcbddc->computed_rowadj = PETSC_TRUE; 2824 } else { /* just compute subdomain's connected components */ 2825 IS is_dummy; 2826 ISLocalToGlobalMapping l2gmap_dummy; 2827 PetscInt j,sum; 2828 PetscInt *cxadj,*cadjncy; 2829 const PetscInt *idxs; 2830 PCBDDCGraph graph; 2831 PetscBT is_on_boundary; 2832 2833 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2834 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2835 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2836 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2837 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2838 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2839 graph->xadj = xadj; 2840 graph->adjncy = adjncy; 2841 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2842 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2843 2844 if (pcbddc->dbg_flag) { 2845 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2846 for (i=0;i<graph->ncc;i++) { 2847 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2848 } 2849 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2850 } 2851 2852 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2853 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2854 for (i=0;i<pcis->n_B;i++) { 2855 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2856 } 2857 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2858 2859 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2860 sum = 0; 2861 for (i=0;i<graph->ncc;i++) { 2862 PetscInt sizecc = 0; 2863 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2864 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2865 sizecc++; 2866 } 2867 } 2868 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2869 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2870 cxadj[graph->queue[j]] = sizecc; 2871 } 2872 } 2873 sum += sizecc*sizecc; 2874 } 2875 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2876 sum = 0; 2877 for (i=0;i<nvtxs;i++) { 2878 PetscInt temp = cxadj[i]; 2879 cxadj[i] = sum; 2880 sum += temp; 2881 } 2882 cxadj[nvtxs] = sum; 2883 for (i=0;i<graph->ncc;i++) { 2884 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2885 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2886 PetscInt k,sizecc = 0; 2887 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2888 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2889 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2890 sizecc++; 2891 } 2892 } 2893 } 2894 } 2895 } 2896 if (nvtxs) { 2897 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2898 } else { 2899 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2900 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2901 } 2902 graph->xadj = 0; 2903 graph->adjncy = 0; 2904 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2905 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2906 } 2907 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2908 if (!flg_row) { 2909 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2910 } 2911 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2912 } 2913 2914 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2915 vertex_size = 1; 2916 if (pcbddc->user_provided_isfordofs) { 2917 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2918 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2919 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2920 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2921 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2922 } 2923 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2924 pcbddc->n_ISForDofs = 0; 2925 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2926 } 2927 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2928 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2929 } else { 2930 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2931 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2932 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2933 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2934 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2935 } 2936 } 2937 } 2938 2939 /* Setup of Graph */ 2940 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2941 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2942 } 2943 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2944 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2945 } 2946 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2947 2948 /* Graph's connected components analysis */ 2949 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2950 2951 /* print some info to stdout */ 2952 if (pcbddc->dbg_flag) { 2953 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2954 } 2955 2956 /* mark topography has done */ 2957 pcbddc->recompute_topography = PETSC_FALSE; 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2963 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2964 { 2965 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2966 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2967 PetscErrorCode ierr; 2968 2969 PetscFunctionBegin; 2970 n = 0; 2971 vertices = 0; 2972 if (pcbddc->ConstraintMatrix) { 2973 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2974 for (i=0;i<local_primal_size;i++) { 2975 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2976 if (size_of_constraint == 1) n++; 2977 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2978 } 2979 if (vertices_idx) { 2980 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2981 n = 0; 2982 for (i=0;i<local_primal_size;i++) { 2983 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2984 if (size_of_constraint == 1) { 2985 vertices[n++]=row_cmat_indices[0]; 2986 } 2987 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2988 } 2989 } 2990 } 2991 *n_vertices = n; 2992 if (vertices_idx) *vertices_idx = vertices; 2993 PetscFunctionReturn(0); 2994 } 2995 2996 #undef __FUNCT__ 2997 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2998 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2999 { 3000 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3001 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3002 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3003 PetscBT touched; 3004 PetscErrorCode ierr; 3005 3006 /* This function assumes that the number of local constraints per connected component 3007 is not greater than the number of nodes defined for the connected component 3008 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3009 PetscFunctionBegin; 3010 n = 0; 3011 constraints_index = 0; 3012 if (pcbddc->ConstraintMatrix) { 3013 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3014 max_size_of_constraint = 0; 3015 for (i=0;i<local_primal_size;i++) { 3016 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3017 if (size_of_constraint > 1) { 3018 n++; 3019 } 3020 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3021 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3022 } 3023 if (constraints_idx) { 3024 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3025 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3026 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3027 n = 0; 3028 for (i=0;i<local_primal_size;i++) { 3029 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3030 if (size_of_constraint > 1) { 3031 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3032 /* find first untouched local node */ 3033 j = 0; 3034 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3035 min_index = row_cmat_global_indices[j]; 3036 min_loc = j; 3037 /* search the minimum among nodes not yet touched on the connected component 3038 since there can be more than one constraint on a single cc */ 3039 for (j=1;j<size_of_constraint;j++) { 3040 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3041 min_index = row_cmat_global_indices[j]; 3042 min_loc = j; 3043 } 3044 } 3045 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3046 constraints_index[n++] = row_cmat_indices[min_loc]; 3047 } 3048 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3049 } 3050 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3051 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3052 } 3053 } 3054 *n_constraints = n; 3055 if (constraints_idx) *constraints_idx = constraints_index; 3056 PetscFunctionReturn(0); 3057 } 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "PCBDDCSubsetNumbering" 3061 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3062 { 3063 Vec local_vec,global_vec; 3064 IS seqis,paris; 3065 VecScatter scatter_ctx; 3066 PetscScalar *array; 3067 PetscInt *temp_global_dofs; 3068 PetscScalar globalsum; 3069 PetscInt i,j,s; 3070 PetscInt nlocals,first_index,old_index,max_local; 3071 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3072 PetscMPIInt *dof_sizes,*dof_displs; 3073 PetscBool first_found; 3074 PetscErrorCode ierr; 3075 3076 PetscFunctionBegin; 3077 /* mpi buffers */ 3078 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3079 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3080 j = ( !rank_prec_comm ? size_prec_comm : 0); 3081 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3082 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3083 /* get maximum size of subset */ 3084 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3085 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3086 max_local = 0; 3087 for (i=0;i<n_local_dofs;i++) { 3088 if (max_local < temp_global_dofs[i] ) { 3089 max_local = temp_global_dofs[i]; 3090 } 3091 } 3092 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3093 max_global++; 3094 max_local = 0; 3095 for (i=0;i<n_local_dofs;i++) { 3096 if (max_local < local_dofs[i] ) { 3097 max_local = local_dofs[i]; 3098 } 3099 } 3100 max_local++; 3101 /* allocate workspace */ 3102 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3103 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3104 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3105 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3106 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3107 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3108 /* create scatter */ 3109 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3110 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3111 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3112 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3113 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3114 /* init array */ 3115 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3116 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3117 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3118 if (local_dofs_mult) { 3119 for (i=0;i<n_local_dofs;i++) { 3120 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3121 } 3122 } else { 3123 for (i=0;i<n_local_dofs;i++) { 3124 array[local_dofs[i]]=1.0; 3125 } 3126 } 3127 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3128 /* scatter into global vec and get total number of global dofs */ 3129 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3130 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3131 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3132 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3133 /* Fill global_vec with cumulative function for global numbering */ 3134 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3135 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3136 nlocals = 0; 3137 first_index = -1; 3138 first_found = PETSC_FALSE; 3139 for (i=0;i<s;i++) { 3140 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3141 first_found = PETSC_TRUE; 3142 first_index = i; 3143 } 3144 nlocals += (PetscInt)PetscRealPart(array[i]); 3145 } 3146 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3147 if (!rank_prec_comm) { 3148 dof_displs[0]=0; 3149 for (i=1;i<size_prec_comm;i++) { 3150 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3151 } 3152 } 3153 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3154 if (first_found) { 3155 array[first_index] += (PetscScalar)nlocals; 3156 old_index = first_index; 3157 for (i=first_index+1;i<s;i++) { 3158 if (PetscRealPart(array[i]) > 0.1) { 3159 array[i] += array[old_index]; 3160 old_index = i; 3161 } 3162 } 3163 } 3164 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3165 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3166 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3167 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3168 /* get global ordering of local dofs */ 3169 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3170 if (local_dofs_mult) { 3171 for (i=0;i<n_local_dofs;i++) { 3172 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3173 } 3174 } else { 3175 for (i=0;i<n_local_dofs;i++) { 3176 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3177 } 3178 } 3179 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3180 /* free workspace */ 3181 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3182 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3183 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3184 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3185 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3186 /* return pointer to global ordering of local dofs */ 3187 *global_numbering_subset = temp_global_dofs; 3188 PetscFunctionReturn(0); 3189 } 3190 3191 #undef __FUNCT__ 3192 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3193 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3194 { 3195 PetscInt i,j; 3196 PetscScalar *alphas; 3197 PetscErrorCode ierr; 3198 3199 PetscFunctionBegin; 3200 /* this implements stabilized Gram-Schmidt */ 3201 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3202 for (i=0;i<n;i++) { 3203 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3204 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3205 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3206 } 3207 ierr = PetscFree(alphas);CHKERRQ(ierr); 3208 PetscFunctionReturn(0); 3209 } 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatISGetSubassemblingPattern" 3213 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3214 { 3215 Mat subdomain_adj; 3216 IS new_ranks,ranks_send_to; 3217 MatPartitioning partitioner; 3218 Mat_IS *matis; 3219 PetscInt n_neighs,*neighs,*n_shared,**shared; 3220 PetscInt prank; 3221 PetscMPIInt size,rank,color; 3222 PetscInt *xadj,*adjncy,*oldranks; 3223 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3224 PetscInt i,local_size,threshold=0; 3225 PetscErrorCode ierr; 3226 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3227 PetscSubcomm subcomm; 3228 3229 PetscFunctionBegin; 3230 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3231 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3232 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3233 3234 /* Get info on mapping */ 3235 matis = (Mat_IS*)(mat->data); 3236 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3237 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3238 3239 /* build local CSR graph of subdomains' connectivity */ 3240 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3241 xadj[0] = 0; 3242 xadj[1] = PetscMax(n_neighs-1,0); 3243 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3244 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3245 3246 if (threshold) { 3247 PetscInt xadj_count = 0; 3248 for (i=1;i<n_neighs;i++) { 3249 if (n_shared[i] > threshold) { 3250 adjncy[xadj_count] = neighs[i]; 3251 adjncy_wgt[xadj_count] = n_shared[i]; 3252 xadj_count++; 3253 } 3254 } 3255 xadj[1] = xadj_count; 3256 } else { 3257 if (xadj[1]) { 3258 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3259 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3260 } 3261 } 3262 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3263 if (use_square) { 3264 for (i=0;i<xadj[1];i++) { 3265 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3266 } 3267 } 3268 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3269 3270 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3271 3272 /* 3273 Restrict work on active processes only. 3274 */ 3275 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3276 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3277 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3278 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3279 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3280 if (color) { 3281 ierr = PetscFree(xadj);CHKERRQ(ierr); 3282 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3283 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3284 } else { 3285 PetscInt coarsening_ratio; 3286 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3287 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3288 prank = rank; 3289 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3290 /* 3291 for (i=0;i<size;i++) { 3292 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3293 } 3294 */ 3295 for (i=0;i<xadj[1];i++) { 3296 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3297 } 3298 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3299 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3300 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3301 3302 /* Partition */ 3303 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3304 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3305 if (use_vwgt) { 3306 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3307 v_wgt[0] = local_size; 3308 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3309 } 3310 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3311 coarsening_ratio = size/n_subdomains; 3312 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3313 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3314 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3315 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3316 3317 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3318 if (contiguous) { 3319 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3320 } else { 3321 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3322 } 3323 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3324 /* clean up */ 3325 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3326 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3327 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3328 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3329 } 3330 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3331 3332 /* assemble parallel IS for sends */ 3333 i = 1; 3334 if (color) i=0; 3335 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3336 3337 /* get back IS */ 3338 *is_sends = ranks_send_to; 3339 PetscFunctionReturn(0); 3340 } 3341 3342 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3343 3344 #undef __FUNCT__ 3345 #define __FUNCT__ "MatISSubassemble" 3346 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3347 { 3348 Mat local_mat; 3349 Mat_IS *matis; 3350 IS is_sends_internal; 3351 PetscInt rows,cols,new_local_rows; 3352 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3353 PetscBool ismatis,isdense,newisdense,destroy_mat; 3354 ISLocalToGlobalMapping l2gmap; 3355 PetscInt* l2gmap_indices; 3356 const PetscInt* is_indices; 3357 MatType new_local_type; 3358 /* buffers */ 3359 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3360 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3361 PetscInt *recv_buffer_idxs_local; 3362 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3363 /* MPI */ 3364 MPI_Comm comm,comm_n; 3365 PetscSubcomm subcomm; 3366 PetscMPIInt n_sends,n_recvs,commsize; 3367 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3368 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3369 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3370 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3371 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3372 PetscErrorCode ierr; 3373 3374 PetscFunctionBegin; 3375 /* TODO: add missing checks */ 3376 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3377 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3378 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3379 PetscValidLogicalCollectiveInt(mat,nis,7); 3380 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3381 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3382 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3383 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3384 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3385 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3386 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3387 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3388 PetscInt mrows,mcols,mnrows,mncols; 3389 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3390 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3391 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3392 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3393 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3394 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3395 } 3396 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3397 PetscValidLogicalCollectiveInt(mat,bs,0); 3398 /* prepare IS for sending if not provided */ 3399 if (!is_sends) { 3400 PetscBool pcontig = PETSC_TRUE; 3401 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3402 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3403 } else { 3404 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3405 is_sends_internal = is_sends; 3406 } 3407 3408 /* get pointer of MATIS data */ 3409 matis = (Mat_IS*)mat->data; 3410 3411 /* get comm */ 3412 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3413 3414 /* compute number of sends */ 3415 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3416 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3417 3418 /* compute number of receives */ 3419 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3420 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3421 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3422 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3423 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3424 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3425 ierr = PetscFree(iflags);CHKERRQ(ierr); 3426 3427 /* restrict comm if requested */ 3428 subcomm = 0; 3429 destroy_mat = PETSC_FALSE; 3430 if (restrict_comm) { 3431 PetscMPIInt color,rank,subcommsize; 3432 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3433 color = 0; 3434 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3435 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3436 subcommsize = commsize - subcommsize; 3437 /* check if reuse has been requested */ 3438 if (reuse == MAT_REUSE_MATRIX) { 3439 if (*mat_n) { 3440 PetscMPIInt subcommsize2; 3441 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3442 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3443 comm_n = PetscObjectComm((PetscObject)*mat_n); 3444 } else { 3445 comm_n = PETSC_COMM_SELF; 3446 } 3447 } else { /* MAT_INITIAL_MATRIX */ 3448 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3449 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3450 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3451 comm_n = subcomm->comm; 3452 } 3453 /* flag to destroy *mat_n if not significative */ 3454 if (color) destroy_mat = PETSC_TRUE; 3455 } else { 3456 comm_n = comm; 3457 } 3458 3459 /* prepare send/receive buffers */ 3460 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3461 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3462 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3463 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3464 if (nis) { 3465 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3466 } 3467 3468 /* Get data from local matrices */ 3469 if (!isdense) { 3470 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3471 /* TODO: See below some guidelines on how to prepare the local buffers */ 3472 /* 3473 send_buffer_vals should contain the raw values of the local matrix 3474 send_buffer_idxs should contain: 3475 - MatType_PRIVATE type 3476 - PetscInt size_of_l2gmap 3477 - PetscInt global_row_indices[size_of_l2gmap] 3478 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3479 */ 3480 } else { 3481 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3482 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3483 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3484 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3485 send_buffer_idxs[1] = i; 3486 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3487 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3488 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3489 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3490 for (i=0;i<n_sends;i++) { 3491 ilengths_vals[is_indices[i]] = len*len; 3492 ilengths_idxs[is_indices[i]] = len+2; 3493 } 3494 } 3495 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3496 /* additional is (if any) */ 3497 if (nis) { 3498 PetscMPIInt psum; 3499 PetscInt j; 3500 for (j=0,psum=0;j<nis;j++) { 3501 PetscInt plen; 3502 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3503 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3504 psum += len+1; /* indices + lenght */ 3505 } 3506 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3507 for (j=0,psum=0;j<nis;j++) { 3508 PetscInt plen; 3509 const PetscInt *is_array_idxs; 3510 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3511 send_buffer_idxs_is[psum] = plen; 3512 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3513 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3514 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3515 psum += plen+1; /* indices + lenght */ 3516 } 3517 for (i=0;i<n_sends;i++) { 3518 ilengths_idxs_is[is_indices[i]] = psum; 3519 } 3520 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3521 } 3522 3523 buf_size_idxs = 0; 3524 buf_size_vals = 0; 3525 buf_size_idxs_is = 0; 3526 for (i=0;i<n_recvs;i++) { 3527 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3528 buf_size_vals += (PetscInt)olengths_vals[i]; 3529 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3530 } 3531 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3532 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3533 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3534 3535 /* get new tags for clean communications */ 3536 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3537 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3538 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3539 3540 /* allocate for requests */ 3541 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3542 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3543 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3544 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3545 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3546 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3547 3548 /* communications */ 3549 ptr_idxs = recv_buffer_idxs; 3550 ptr_vals = recv_buffer_vals; 3551 ptr_idxs_is = recv_buffer_idxs_is; 3552 for (i=0;i<n_recvs;i++) { 3553 source_dest = onodes[i]; 3554 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3555 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3556 ptr_idxs += olengths_idxs[i]; 3557 ptr_vals += olengths_vals[i]; 3558 if (nis) { 3559 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3560 ptr_idxs_is += olengths_idxs_is[i]; 3561 } 3562 } 3563 for (i=0;i<n_sends;i++) { 3564 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3565 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3566 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3567 if (nis) { 3568 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3569 } 3570 } 3571 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3572 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3573 3574 /* assemble new l2g map */ 3575 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3576 ptr_idxs = recv_buffer_idxs; 3577 new_local_rows = 0; 3578 for (i=0;i<n_recvs;i++) { 3579 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3580 ptr_idxs += olengths_idxs[i]; 3581 } 3582 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3583 ptr_idxs = recv_buffer_idxs; 3584 new_local_rows = 0; 3585 for (i=0;i<n_recvs;i++) { 3586 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3587 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3588 ptr_idxs += olengths_idxs[i]; 3589 } 3590 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3591 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3592 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3593 3594 /* infer new local matrix type from received local matrices type */ 3595 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3596 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3597 if (n_recvs) { 3598 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3599 ptr_idxs = recv_buffer_idxs; 3600 for (i=0;i<n_recvs;i++) { 3601 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3602 new_local_type_private = MATAIJ_PRIVATE; 3603 break; 3604 } 3605 ptr_idxs += olengths_idxs[i]; 3606 } 3607 switch (new_local_type_private) { 3608 case MATDENSE_PRIVATE: 3609 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3610 new_local_type = MATSEQAIJ; 3611 bs = 1; 3612 } else { /* if I receive only 1 dense matrix */ 3613 new_local_type = MATSEQDENSE; 3614 bs = 1; 3615 } 3616 break; 3617 case MATAIJ_PRIVATE: 3618 new_local_type = MATSEQAIJ; 3619 bs = 1; 3620 break; 3621 case MATBAIJ_PRIVATE: 3622 new_local_type = MATSEQBAIJ; 3623 break; 3624 case MATSBAIJ_PRIVATE: 3625 new_local_type = MATSEQSBAIJ; 3626 break; 3627 default: 3628 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3629 break; 3630 } 3631 } else { /* by default, new_local_type is seqdense */ 3632 new_local_type = MATSEQDENSE; 3633 bs = 1; 3634 } 3635 3636 /* create MATIS object if needed */ 3637 if (reuse == MAT_INITIAL_MATRIX) { 3638 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3639 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3640 } else { 3641 /* it also destroys the local matrices */ 3642 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3643 } 3644 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3645 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3646 3647 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3648 3649 /* Global to local map of received indices */ 3650 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3651 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3652 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3653 3654 /* restore attributes -> type of incoming data and its size */ 3655 buf_size_idxs = 0; 3656 for (i=0;i<n_recvs;i++) { 3657 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3658 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3659 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3660 } 3661 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3662 3663 /* set preallocation */ 3664 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3665 if (!newisdense) { 3666 PetscInt *new_local_nnz=0; 3667 3668 ptr_vals = recv_buffer_vals; 3669 ptr_idxs = recv_buffer_idxs_local; 3670 if (n_recvs) { 3671 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3672 } 3673 for (i=0;i<n_recvs;i++) { 3674 PetscInt j; 3675 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3676 for (j=0;j<*(ptr_idxs+1);j++) { 3677 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3678 } 3679 } else { 3680 /* TODO */ 3681 } 3682 ptr_idxs += olengths_idxs[i]; 3683 } 3684 if (new_local_nnz) { 3685 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3686 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3687 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3688 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3689 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3690 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3691 } else { 3692 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3693 } 3694 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3695 } else { 3696 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3697 } 3698 3699 /* set values */ 3700 ptr_vals = recv_buffer_vals; 3701 ptr_idxs = recv_buffer_idxs_local; 3702 for (i=0;i<n_recvs;i++) { 3703 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3704 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3705 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3706 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3709 } else { 3710 /* TODO */ 3711 } 3712 ptr_idxs += olengths_idxs[i]; 3713 ptr_vals += olengths_vals[i]; 3714 } 3715 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3716 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3717 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3718 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3719 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3720 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3721 3722 #if 0 3723 if (!restrict_comm) { /* check */ 3724 Vec lvec,rvec; 3725 PetscReal infty_error; 3726 3727 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3728 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3729 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3730 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3731 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3732 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3733 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3734 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3735 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3736 } 3737 #endif 3738 3739 /* assemble new additional is (if any) */ 3740 if (nis) { 3741 PetscInt **temp_idxs,*count_is,j,psum; 3742 3743 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3744 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3745 ptr_idxs = recv_buffer_idxs_is; 3746 psum = 0; 3747 for (i=0;i<n_recvs;i++) { 3748 for (j=0;j<nis;j++) { 3749 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3750 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3751 psum += plen; 3752 ptr_idxs += plen+1; /* shift pointer to received data */ 3753 } 3754 } 3755 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3756 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3757 for (i=1;i<nis;i++) { 3758 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3759 } 3760 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3761 ptr_idxs = recv_buffer_idxs_is; 3762 for (i=0;i<n_recvs;i++) { 3763 for (j=0;j<nis;j++) { 3764 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3765 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3766 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3767 ptr_idxs += plen+1; /* shift pointer to received data */ 3768 } 3769 } 3770 for (i=0;i<nis;i++) { 3771 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3772 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3773 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3774 } 3775 ierr = PetscFree(count_is);CHKERRQ(ierr); 3776 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3777 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3778 } 3779 /* free workspace */ 3780 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3781 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3782 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3783 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3784 if (isdense) { 3785 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3786 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3787 } else { 3788 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3789 } 3790 if (nis) { 3791 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3792 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3793 } 3794 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3795 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3796 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3797 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3798 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3799 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3800 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3801 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3802 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3803 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3804 ierr = PetscFree(onodes);CHKERRQ(ierr); 3805 if (nis) { 3806 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3807 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3808 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3809 } 3810 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3811 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3812 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3813 for (i=0;i<nis;i++) { 3814 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3815 } 3816 } 3817 PetscFunctionReturn(0); 3818 } 3819 3820 /* temporary hack into ksp private data structure */ 3821 #include <petsc-private/kspimpl.h> 3822 3823 #undef __FUNCT__ 3824 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3825 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3826 { 3827 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3828 PC_IS *pcis = (PC_IS*)pc->data; 3829 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3830 MatNullSpace CoarseNullSpace=NULL; 3831 ISLocalToGlobalMapping coarse_islg; 3832 IS coarse_is,*isarray; 3833 PetscInt i,im_active=-1,active_procs=-1; 3834 PetscInt nis,nisdofs,nisneu; 3835 PC pc_temp; 3836 PCType coarse_pc_type; 3837 KSPType coarse_ksp_type; 3838 PetscBool multilevel_requested,multilevel_allowed; 3839 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3840 Mat t_coarse_mat_is; 3841 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3842 PetscMPIInt all_procs; 3843 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3844 PetscBool compute_vecs = PETSC_FALSE; 3845 PetscScalar *array; 3846 PetscErrorCode ierr; 3847 3848 PetscFunctionBegin; 3849 /* Assign global numbering to coarse dofs */ 3850 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3851 compute_vecs = PETSC_TRUE; 3852 PetscInt ocoarse_size; 3853 ocoarse_size = pcbddc->coarse_size; 3854 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3855 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3856 /* see if we can avoid some work */ 3857 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3858 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3859 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3860 coarse_reuse = PETSC_FALSE; 3861 } else { /* we can safely reuse already computed coarse matrix */ 3862 coarse_reuse = PETSC_TRUE; 3863 } 3864 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3865 coarse_reuse = PETSC_FALSE; 3866 } 3867 /* reset any subassembling information */ 3868 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3869 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3870 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3871 coarse_reuse = PETSC_TRUE; 3872 } 3873 3874 /* count "active" (i.e. with positive local size) and "void" processes */ 3875 im_active = !!(pcis->n); 3876 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3877 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3878 void_procs = all_procs-active_procs; 3879 csin_type_simple = PETSC_TRUE; 3880 redist = PETSC_FALSE; 3881 if (pcbddc->current_level && void_procs) { 3882 csin_ml = PETSC_TRUE; 3883 ncoarse_ml = void_procs; 3884 csin_ds = PETSC_TRUE; 3885 ncoarse_ds = void_procs; 3886 } else { 3887 csin_ml = PETSC_FALSE; 3888 ncoarse_ml = all_procs; 3889 if (void_procs) { 3890 csin_ds = PETSC_TRUE; 3891 ncoarse_ds = void_procs; 3892 csin_type_simple = PETSC_FALSE; 3893 } else { 3894 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3895 csin_ds = PETSC_TRUE; 3896 ncoarse_ds = pcbddc->redistribute_coarse; 3897 redist = PETSC_TRUE; 3898 } else { 3899 csin_ds = PETSC_FALSE; 3900 ncoarse_ds = all_procs; 3901 } 3902 } 3903 } 3904 3905 /* 3906 test if we can go multilevel: three conditions must be satisfied: 3907 - we have not exceeded the number of levels requested 3908 - we can actually subassemble the active processes 3909 - we can find a suitable number of MPI processes where we can place the subassembled problem 3910 */ 3911 multilevel_allowed = PETSC_FALSE; 3912 multilevel_requested = PETSC_FALSE; 3913 if (pcbddc->current_level < pcbddc->max_levels) { 3914 multilevel_requested = PETSC_TRUE; 3915 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3916 multilevel_allowed = PETSC_FALSE; 3917 } else { 3918 multilevel_allowed = PETSC_TRUE; 3919 } 3920 } 3921 /* determine number of process partecipating to coarse solver */ 3922 if (multilevel_allowed) { 3923 ncoarse = ncoarse_ml; 3924 csin = csin_ml; 3925 } else { 3926 ncoarse = ncoarse_ds; 3927 csin = csin_ds; 3928 } 3929 3930 /* creates temporary l2gmap and IS for coarse indexes */ 3931 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3932 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3933 3934 /* creates temporary MATIS object for coarse matrix */ 3935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3936 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3937 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3938 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3939 #if 0 3940 { 3941 PetscViewer viewer; 3942 char filename[256]; 3943 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3944 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3945 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3946 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3947 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3948 } 3949 #endif 3950 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3951 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3952 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3953 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3954 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3955 3956 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3957 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3958 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3959 const PetscInt *idxs; 3960 ISLocalToGlobalMapping tmap; 3961 3962 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3963 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3964 /* allocate space for temporary storage */ 3965 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3966 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3967 /* allocate for IS array */ 3968 nisdofs = pcbddc->n_ISForDofsLocal; 3969 nisneu = !!pcbddc->NeumannBoundariesLocal; 3970 nis = nisdofs + nisneu; 3971 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3972 /* dofs splitting */ 3973 for (i=0;i<nisdofs;i++) { 3974 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3975 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3976 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3977 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3978 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3979 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3980 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3981 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3982 } 3983 /* neumann boundaries */ 3984 if (pcbddc->NeumannBoundariesLocal) { 3985 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3986 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3987 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3988 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3989 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3990 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3991 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3992 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3993 } 3994 /* free memory */ 3995 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3996 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3997 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3998 } else { 3999 nis = 0; 4000 nisdofs = 0; 4001 nisneu = 0; 4002 isarray = NULL; 4003 } 4004 /* destroy no longer needed map */ 4005 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4006 4007 /* restrict on coarse candidates (if needed) */ 4008 coarse_mat_is = NULL; 4009 if (csin) { 4010 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4011 if (redist) { 4012 PetscMPIInt rank; 4013 PetscInt spc,n_spc_p1,dest[1]; 4014 4015 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4016 spc = all_procs/pcbddc->redistribute_coarse; 4017 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 4018 if (rank > n_spc_p1*(spc+1)-1) { 4019 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4020 } else { 4021 dest[0] = rank/(spc+1); 4022 } 4023 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4024 } else { 4025 PetscInt j,tissize,*nisindices; 4026 PetscInt *coarse_candidates; 4027 const PetscInt* tisindices; 4028 /* get coarse candidates' ranks in pc communicator */ 4029 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4030 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4031 for (i=0,j=0;i<all_procs;i++) { 4032 if (!coarse_candidates[i]) { 4033 coarse_candidates[j]=i; 4034 j++; 4035 } 4036 } 4037 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 4038 /* get a suitable subassembling pattern */ 4039 if (csin_type_simple) { 4040 PetscMPIInt rank; 4041 PetscInt issize,isidx; 4042 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4043 if (im_active) { 4044 issize = 1; 4045 isidx = (PetscInt)rank; 4046 } else { 4047 issize = 0; 4048 isidx = -1; 4049 } 4050 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4051 } else { 4052 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4053 } 4054 if (pcbddc->dbg_flag) { 4055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4057 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4059 for (i=0;i<j;i++) { 4060 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4061 } 4062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4063 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4064 } 4065 /* shift the pattern on coarse candidates */ 4066 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4067 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4068 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4069 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4070 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4071 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4072 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4073 } 4074 } 4075 if (pcbddc->dbg_flag) { 4076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4077 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4078 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4080 } 4081 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4082 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4083 } else { 4084 if (pcbddc->dbg_flag) { 4085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4087 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4088 } 4089 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4090 coarse_mat_is = t_coarse_mat_is; 4091 } 4092 4093 /* create local to global scatters for coarse problem */ 4094 if (compute_vecs) { 4095 PetscInt lrows; 4096 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4097 if (coarse_mat_is) { 4098 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4099 } else { 4100 lrows = 0; 4101 } 4102 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4103 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4104 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4106 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4107 } 4108 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4109 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4110 4111 /* set defaults for coarse KSP and PC */ 4112 if (multilevel_allowed) { 4113 coarse_ksp_type = KSPRICHARDSON; 4114 coarse_pc_type = PCBDDC; 4115 } else { 4116 coarse_ksp_type = KSPPREONLY; 4117 coarse_pc_type = PCREDUNDANT; 4118 } 4119 4120 /* print some info if requested */ 4121 if (pcbddc->dbg_flag) { 4122 if (!multilevel_allowed) { 4123 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4124 if (multilevel_requested) { 4125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4126 } else if (pcbddc->max_levels) { 4127 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4128 } 4129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4130 } 4131 } 4132 4133 /* create the coarse KSP object only once with defaults */ 4134 if (coarse_mat_is) { 4135 MatReuse coarse_mat_reuse; 4136 PetscViewer dbg_viewer = NULL; 4137 if (pcbddc->dbg_flag) { 4138 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4139 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4140 } 4141 if (!pcbddc->coarse_ksp) { 4142 char prefix[256],str_level[16]; 4143 size_t len; 4144 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4145 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4146 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4147 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4148 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4149 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4150 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4151 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4152 /* prefix */ 4153 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4154 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4155 if (!pcbddc->current_level) { 4156 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4157 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4158 } else { 4159 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4160 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4161 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4162 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4163 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4164 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4165 } 4166 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4167 /* allow user customization */ 4168 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4169 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4170 } 4171 4172 /* get some info after set from options */ 4173 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4174 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4175 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4176 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4177 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4178 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4179 isbddc = PETSC_FALSE; 4180 } 4181 if (isredundant) { 4182 KSP inner_ksp; 4183 PC inner_pc; 4184 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4185 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4186 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4187 } 4188 4189 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4190 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4191 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4192 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4193 if (nisdofs) { 4194 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4195 for (i=0;i<nisdofs;i++) { 4196 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4197 } 4198 } 4199 if (nisneu) { 4200 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4201 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4202 } 4203 4204 /* assemble coarse matrix */ 4205 if (coarse_reuse) { 4206 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4207 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4208 coarse_mat_reuse = MAT_REUSE_MATRIX; 4209 } else { 4210 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4211 } 4212 if (isbddc || isnn) { 4213 if (pcbddc->coarsening_ratio > 1) { 4214 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4215 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4216 if (pcbddc->dbg_flag) { 4217 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4218 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4219 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4220 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4221 } 4222 } 4223 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4224 } else { 4225 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4226 coarse_mat = coarse_mat_is; 4227 } 4228 } else { 4229 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4230 } 4231 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4232 4233 /* propagate symmetry info to coarse matrix */ 4234 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4235 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4236 4237 /* set operators */ 4238 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4239 if (pcbddc->dbg_flag) { 4240 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4241 } 4242 } else { /* processes non partecipating to coarse solver (if any) */ 4243 coarse_mat = 0; 4244 } 4245 ierr = PetscFree(isarray);CHKERRQ(ierr); 4246 #if 0 4247 { 4248 PetscViewer viewer; 4249 char filename[256]; 4250 sprintf(filename,"coarse_mat.m"); 4251 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4252 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4253 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4254 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4255 } 4256 #endif 4257 4258 /* Compute coarse null space (special handling by BDDC only) */ 4259 if (pcbddc->NullSpace) { 4260 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4261 } 4262 4263 if (pcbddc->coarse_ksp) { 4264 Vec crhs,csol; 4265 PetscBool ispreonly; 4266 if (CoarseNullSpace) { 4267 if (isbddc) { 4268 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4269 } else { 4270 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4271 } 4272 } 4273 /* setup coarse ksp */ 4274 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4275 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4276 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4277 /* hack */ 4278 if (!csol) { 4279 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4280 } 4281 if (!crhs) { 4282 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4283 } 4284 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4285 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4286 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4287 KSP check_ksp; 4288 KSPType check_ksp_type; 4289 PC check_pc; 4290 Vec check_vec,coarse_vec; 4291 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4292 PetscInt its; 4293 PetscBool compute_eigs; 4294 PetscReal *eigs_r,*eigs_c; 4295 PetscInt neigs; 4296 const char *prefix; 4297 4298 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4299 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4300 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4301 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4302 if (ispreonly) { 4303 check_ksp_type = KSPPREONLY; 4304 compute_eigs = PETSC_FALSE; 4305 } else { 4306 check_ksp_type = KSPGMRES; 4307 compute_eigs = PETSC_TRUE; 4308 } 4309 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4310 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4311 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4312 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4313 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4314 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4315 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4316 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4317 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4318 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4319 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4320 /* create random vec */ 4321 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4322 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4323 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4324 if (CoarseNullSpace) { 4325 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4326 } 4327 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4328 /* solve coarse problem */ 4329 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4330 if (CoarseNullSpace) { 4331 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4332 } 4333 /* set eigenvalue estimation if preonly has not been requested */ 4334 if (compute_eigs) { 4335 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4336 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4337 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4338 lambda_max = eigs_r[neigs-1]; 4339 lambda_min = eigs_r[0]; 4340 if (pcbddc->use_coarse_estimates) { 4341 if (lambda_max>lambda_min) { 4342 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4343 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4344 } 4345 } 4346 } 4347 4348 /* check coarse problem residual error */ 4349 if (pcbddc->dbg_flag) { 4350 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4351 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4352 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4353 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4354 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4355 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4356 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4358 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4359 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4360 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4361 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4362 if (compute_eigs) { 4363 PetscReal lambda_max_s,lambda_min_s; 4364 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4365 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4366 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4367 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4368 for (i=0;i<neigs;i++) { 4369 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4370 } 4371 } 4372 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4373 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4374 } 4375 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4376 if (compute_eigs) { 4377 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4378 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4379 } 4380 } 4381 } 4382 /* print additional info */ 4383 if (pcbddc->dbg_flag) { 4384 /* waits until all processes reaches this point */ 4385 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4388 } 4389 4390 /* free memory */ 4391 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4392 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 #undef __FUNCT__ 4397 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4398 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4399 { 4400 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4401 PC_IS* pcis = (PC_IS*)pc->data; 4402 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4403 PetscInt i,coarse_size; 4404 PetscInt *local_primal_indices; 4405 PetscErrorCode ierr; 4406 4407 PetscFunctionBegin; 4408 /* Compute global number of coarse dofs */ 4409 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4410 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4411 } 4412 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4413 4414 /* check numbering */ 4415 if (pcbddc->dbg_flag) { 4416 PetscScalar coarsesum,*array; 4417 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4418 4419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4420 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4421 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4422 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4423 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4424 for (i=0;i<pcbddc->local_primal_size;i++) { 4425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4426 } 4427 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4428 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4429 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4430 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4431 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4432 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4433 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4434 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4435 for (i=0;i<pcis->n;i++) { 4436 if (array[i] == 1.0) { 4437 set_error = PETSC_TRUE; 4438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4439 } 4440 } 4441 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4442 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4443 for (i=0;i<pcis->n;i++) { 4444 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4445 } 4446 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4447 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4448 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4449 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4451 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4452 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4456 for (i=0;i<pcbddc->local_primal_size;i++) { 4457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4458 } 4459 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4460 } 4461 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4462 if (set_error_reduced) { 4463 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4464 } 4465 } 4466 /* get back data */ 4467 *coarse_size_n = coarse_size; 4468 *local_primal_indices_n = local_primal_indices; 4469 PetscFunctionReturn(0); 4470 } 4471 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "PCBDDCGlobalToLocal" 4474 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4475 { 4476 IS localis_t; 4477 PetscInt i,lsize,*idxs,n; 4478 PetscScalar *vals; 4479 PetscErrorCode ierr; 4480 4481 PetscFunctionBegin; 4482 /* get indices in local ordering exploiting local to global map */ 4483 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4484 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4485 for (i=0;i<lsize;i++) vals[i] = 1.0; 4486 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4487 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4488 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4489 if (idxs) { /* multilevel guard */ 4490 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4491 } 4492 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4493 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4494 ierr = PetscFree(vals);CHKERRQ(ierr); 4495 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4496 /* now compute set in local ordering */ 4497 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4498 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4499 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4500 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4501 for (i=0,lsize=0;i<n;i++) { 4502 if (PetscRealPart(vals[i]) > 0.5) { 4503 lsize++; 4504 } 4505 } 4506 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4507 for (i=0,lsize=0;i<n;i++) { 4508 if (PetscRealPart(vals[i]) > 0.5) { 4509 idxs[lsize++] = i; 4510 } 4511 } 4512 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4513 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4514 *localis = localis_t; 4515 PetscFunctionReturn(0); 4516 } 4517 4518 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "PCBDDCMatMult_Private" 4521 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4522 { 4523 PCBDDCChange_ctx change_ctx; 4524 PetscErrorCode ierr; 4525 4526 PetscFunctionBegin; 4527 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4528 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4529 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4530 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 #undef __FUNCT__ 4535 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4536 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4537 { 4538 PCBDDCChange_ctx change_ctx; 4539 PetscErrorCode ierr; 4540 4541 PetscFunctionBegin; 4542 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4543 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4544 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4545 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4546 PetscFunctionReturn(0); 4547 } 4548 4549 #undef __FUNCT__ 4550 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4551 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4552 { 4553 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4554 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4555 PetscInt *used_xadj,*used_adjncy; 4556 PetscBool free_used_adj; 4557 PetscErrorCode ierr; 4558 4559 PetscFunctionBegin; 4560 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4561 free_used_adj = PETSC_FALSE; 4562 if (pcbddc->sub_schurs_layers == -1) { 4563 used_xadj = NULL; 4564 used_adjncy = NULL; 4565 } else { 4566 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4567 used_xadj = pcbddc->mat_graph->xadj; 4568 used_adjncy = pcbddc->mat_graph->adjncy; 4569 } else if (pcbddc->computed_rowadj) { 4570 used_xadj = pcbddc->mat_graph->xadj; 4571 used_adjncy = pcbddc->mat_graph->adjncy; 4572 } else { 4573 Mat mat_adj; 4574 PetscBool flg_row=PETSC_TRUE; 4575 const PetscInt *xadj,*adjncy; 4576 PetscInt nvtxs; 4577 4578 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4579 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4580 if (!flg_row) { 4581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4582 } 4583 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4584 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4585 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4586 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4587 if (!flg_row) { 4588 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4589 } 4590 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4591 free_used_adj = PETSC_TRUE; 4592 } 4593 } 4594 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4595 4596 /* free adjacency */ 4597 if (free_used_adj) { 4598 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4599 } 4600 PetscFunctionReturn(0); 4601 } 4602 4603 #undef __FUNCT__ 4604 #define __FUNCT__ "PCBDDCInitSubSchurs" 4605 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4606 { 4607 PC_IS *pcis=(PC_IS*)pc->data; 4608 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4609 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4610 PCBDDCGraph graph; 4611 Mat S_j; 4612 PetscErrorCode ierr; 4613 4614 PetscFunctionBegin; 4615 /* attach interface graph for determining subsets */ 4616 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4617 IS verticesIS; 4618 4619 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4620 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4621 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4622 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4623 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4624 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4625 /* 4626 if (pcbddc->dbg_flag) { 4627 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4628 } 4629 */ 4630 } else { 4631 graph = pcbddc->mat_graph; 4632 } 4633 4634 /* Create Schur complement matrix */ 4635 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4636 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4637 4638 /* sub_schurs init */ 4639 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4640 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4641 /* free graph struct */ 4642 if (pcbddc->sub_schurs_rebuild) { 4643 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4644 } 4645 PetscFunctionReturn(0); 4646 } 4647