1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 n = PetscMax(n,nmin); 66 cum += subset_size*n; 67 cum2 += n; 68 maxneigs = PetscMax(maxneigs,n); 69 } 70 } 71 72 if (mss) { 73 if (symmetric) { 74 PetscBLASInt B_itype = 1; 75 PetscBLASInt B_N = mss; 76 PetscScalar zero = 0.0; 77 PetscScalar eps = 0.0; /* dlamch? */ 78 79 B_lwork = -1; 80 S = NULL; 81 St = NULL; 82 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 83 #if defined(PETSC_USE_COMPLEX) 84 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 85 #else 86 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 87 #endif 88 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 89 ierr = PetscFPTrapPop();CHKERRQ(ierr); 90 } else { 91 /* TODO */ 92 } 93 } else { 94 lwork = 0; 95 } 96 97 nv = 0; 98 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 99 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 100 } 101 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 102 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 103 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 104 #if defined(PETSC_USE_COMPLEX) 105 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 106 #endif 107 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 108 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 109 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 110 nv+cum,&pcbddc->adaptive_constraints_idxs, 111 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 112 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 113 114 maxneigs = 0; 115 cum = cum2 = cumarray = 0; 116 if (sub_schurs->is_Ej_com) { 117 const PetscInt *idxs; 118 119 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 120 for (cum=0;cum<nv;cum++) { 121 pcbddc->adaptive_constraints_n[cum] = 1; 122 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 123 pcbddc->adaptive_constraints_ptrs[cum] = cum; 124 pcbddc->adaptive_constraints_data[cum] = 1.0; 125 } 126 cum2 = cum; 127 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 128 } 129 130 if (mss) { /* multilevel */ 131 if (pcbddc->use_deluxe_scaling) { 132 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 133 } else { 134 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 135 } 136 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 137 } 138 139 cumsubs = 0; 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,zero=0.0; 147 PetscBLASInt B_N; 148 149 /* S should be copied since we need it for deluxe scaling */ 150 if (symmetric) { 151 PetscInt j; 152 for (j=0;j<subset_size;j++) { 153 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 154 } 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 } else { 159 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 160 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 /* is always this the right matrix? */ 163 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 165 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 166 /* St = Starray+cumarray; */ 167 168 if (symmetric) { 169 PetscBLASInt B_itype = 1; 170 PetscBLASInt B_IL = 1, B_IU; 171 PetscScalar eps = -1.0; /* dlamch? */ 172 173 /* ask for eigenvalues lower than thresh */ 174 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 175 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 176 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 177 #if defined(PETSC_USE_COMPLEX) 178 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 179 #else 180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 181 #endif 182 ierr = PetscFPTrapPop();CHKERRQ(ierr); 183 if (B_ierr) { 184 if (B_ierr < 0 ) { 185 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 186 } else if (B_ierr <= B_N) { 187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 188 } else { 189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 190 } 191 } 192 193 if (B_neigs > nmax) { 194 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax); 195 B_neigs = nmax; 196 } 197 198 if (B_neigs < nmin) { 199 PetscBLASInt B_neigs2; 200 201 B_IL = B_neigs + 1; 202 ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr); 203 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 204 if (symmetric) { 205 PetscInt j; 206 for (j=0;j<subset_size;j++) { 207 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 208 } 209 for (j=0;j<subset_size;j++) { 210 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 211 } 212 } else { 213 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 214 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 217 #if defined(PETSC_USE_COMPLEX) 218 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 219 #else 220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 221 #endif 222 ierr = PetscFPTrapPop();CHKERRQ(ierr); 223 B_neigs += B_neigs2; 224 } 225 if (B_ierr) { 226 if (B_ierr < 0 ) { 227 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 228 } else if (B_ierr <= B_N) { 229 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 230 } else { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 232 } 233 } 234 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 235 for (j=0;j<B_neigs;j++) { 236 if (eigs[j] == 0.0) { 237 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 238 } else { 239 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 240 } 241 } 242 } else { 243 /* TODO */ 244 } 245 maxneigs = PetscMax(B_neigs,maxneigs); 246 pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs; 247 cumsubs++; 248 249 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 250 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 251 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 253 for (j=0;j<B_neigs;j++) { 254 #if 0 255 { 256 PetscBLASInt Blas_N,Blas_one = 1.0; 257 PetscScalar norm; 258 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 259 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 260 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 261 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 262 } else { 263 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 264 } 265 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 266 } 267 #endif 268 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 269 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 270 cum2 += subset_size; 271 } 272 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 273 } 274 /* shift for next computation */ 275 cumarray += subset_size*subset_size; 276 } 277 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 278 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 279 280 if (mss) { 281 if (pcbddc->use_deluxe_scaling) { 282 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 283 } else { 284 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 285 } 286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 287 } 288 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 289 #if defined(PETSC_USE_COMPLEX) 290 ierr = PetscFree(rwork);CHKERRQ(ierr); 291 #endif 292 if (pcbddc->dbg_flag) { 293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 294 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNCT__ 300 #define __FUNCT__ "PCBDDCSetUpSolvers" 301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 302 { 303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 304 PetscScalar *coarse_submat_vals; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 311 312 /* Setup local neumann solver ksp_R */ 313 /* PCBDDCSetUpLocalScatters should be called first! */ 314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 315 316 /* Change global null space passed in by the user if change of basis has been requested */ 317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 319 } 320 321 /* 322 Setup local correction and local part of coarse basis. 323 Gives back the dense local part of the coarse matrix in column major ordering 324 */ 325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 326 327 /* Compute total number of coarse nodes and setup coarse solver */ 328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 329 330 /* free */ 331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCResetCustomization" 337 PetscErrorCode PCBDDCResetCustomization(PC pc) 338 { 339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 345 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 346 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 347 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 348 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 349 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 350 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 351 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 352 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 353 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCResetTopography" 359 PetscErrorCode PCBDDCResetTopography(PC pc) 360 { 361 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 366 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 367 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 368 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 369 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 370 PetscFunctionReturn(0); 371 } 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "PCBDDCResetSolvers" 375 PetscErrorCode PCBDDCResetSolvers(PC pc) 376 { 377 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 378 PetscScalar *array; 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 383 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 384 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 385 ierr = PetscFree(array);CHKERRQ(ierr); 386 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 387 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 388 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 389 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 390 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 391 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 392 if (pcbddc->local_auxmat2) { 393 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 394 ierr = PetscFree(array);CHKERRQ(ierr); 395 } 396 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 397 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 398 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 399 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 401 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 402 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 403 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 404 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 405 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 406 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 408 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 409 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 410 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 411 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 417 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 418 { 419 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 420 PC_IS *pcis = (PC_IS*)pc->data; 421 VecType impVecType; 422 PetscInt n_constraints,n_R,old_size; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 if (!pcbddc->ConstraintMatrix) { 427 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 428 } 429 /* get sizes */ 430 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 431 n_R = pcis->n-pcbddc->n_actual_vertices; 432 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 433 /* local work vectors (try to avoid unneeded work)*/ 434 /* R nodes */ 435 old_size = -1; 436 if (pcbddc->vec1_R) { 437 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 438 } 439 if (n_R != old_size) { 440 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 441 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 442 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 444 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 445 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 446 } 447 /* local primal dofs */ 448 old_size = -1; 449 if (pcbddc->vec1_P) { 450 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 451 } 452 if (pcbddc->local_primal_size != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 456 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 457 } 458 /* local explicit constraints */ 459 old_size = -1; 460 if (pcbddc->vec1_C) { 461 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 462 } 463 if (n_constraints && n_constraints != old_size) { 464 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 465 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 466 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 467 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "PCBDDCSetUpCorrection" 474 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 475 { 476 PetscErrorCode ierr; 477 /* pointers to pcis and pcbddc */ 478 PC_IS* pcis = (PC_IS*)pc->data; 479 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 480 /* submatrices of local problem */ 481 Mat A_RV,A_VR,A_VV; 482 /* submatrices of local coarse problem */ 483 Mat S_VV,S_CV,S_VC,S_CC; 484 /* working matrices */ 485 Mat C_CR; 486 /* additional working stuff */ 487 PC pc_R; 488 Mat F; 489 PetscBool isLU,isCHOL,isILU; 490 491 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 492 PetscScalar *work; 493 PetscInt *idx_V_B; 494 PetscInt n,n_vertices,n_constraints; 495 PetscInt i,n_R,n_D,n_B; 496 PetscBool unsymmetric_check; 497 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 498 MatType impMatType; 499 /* some shortcuts to scalars */ 500 PetscScalar one=1.0,m_one=-1.0; 501 502 PetscFunctionBegin; 503 /* get number of vertices (corners plus constraints with change of basis) 504 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 505 n_vertices = pcbddc->n_actual_vertices; 506 n_constraints = pcbddc->local_primal_size-n_vertices; 507 /* Set Non-overlapping dimensions */ 508 n_B = pcis->n_B; n_D = pcis->n - n_B; 509 n_R = pcis->n-n_vertices; 510 511 /* Set types for local objects needed by BDDC precondtioner */ 512 impMatType = MATSEQDENSE; 513 514 /* vertices in boundary numbering */ 515 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 516 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 517 if (i != n_vertices) { 518 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 519 } 520 521 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 522 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 523 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 524 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 526 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 527 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 528 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 529 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 530 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 531 532 unsymmetric_check = PETSC_FALSE; 533 /* allocate workspace */ 534 n = 0; 535 if (n_constraints) { 536 n += n_R*n_constraints; 537 } 538 if (n_vertices) { 539 n = PetscMax(2*n_R*n_vertices,n); 540 } 541 if (!pcbddc->issym) { 542 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 543 unsymmetric_check = PETSC_TRUE; 544 } 545 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 546 547 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 548 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 549 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 552 if (isLU || isILU || isCHOL) { 553 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 554 } else { 555 F = NULL; 556 } 557 558 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 559 if (n_constraints) { 560 Mat M1,M2,M3; 561 IS is_aux; 562 /* see if we can save some allocations */ 563 if (pcbddc->local_auxmat2) { 564 PetscInt on_R,on_constraints; 565 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 566 if (on_R != n_R || on_constraints != n_constraints) { 567 PetscScalar *marray; 568 569 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 570 ierr = PetscFree(marray);CHKERRQ(ierr); 571 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 572 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 573 } 574 } 575 /* auxiliary matrices */ 576 if (!pcbddc->local_auxmat2) { 577 PetscScalar *marray; 578 579 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 581 marray += n_R*n_constraints; 582 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 583 } 584 585 /* Extract constraints on R nodes: C_{CR} */ 586 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 587 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 588 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 589 590 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 591 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 592 for (i=0;i<n_constraints;i++) { 593 const PetscScalar *row_cmat_values; 594 const PetscInt *row_cmat_indices; 595 PetscInt size_of_constraint,j; 596 597 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 598 for (j=0;j<size_of_constraint;j++) { 599 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 600 } 601 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 602 } 603 if (F) { 604 Mat B; 605 606 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 607 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 608 ierr = MatDestroy(&B);CHKERRQ(ierr); 609 } else { 610 PetscScalar *xarray; 611 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 612 for (i=0;i<n_constraints;i++) { 613 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 614 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 615 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 616 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 617 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 618 } 619 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 620 } 621 622 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 623 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 625 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 626 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 627 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 628 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 629 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 630 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 631 ierr = MatDestroy(&M2);CHKERRQ(ierr); 632 ierr = MatDestroy(&M3);CHKERRQ(ierr); 633 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 634 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 635 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 636 ierr = MatDestroy(&M1);CHKERRQ(ierr); 637 } 638 639 /* Get submatrices from subdomain matrix */ 640 if (n_vertices) { 641 Mat newmat; 642 IS is_aux; 643 PetscInt ibs,mbs; 644 PetscBool issbaij; 645 646 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 647 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 648 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 649 if (ibs != mbs) { /* need to convert to SEQAIJ */ 650 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 651 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 652 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 653 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 654 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 655 } else { 656 /* this is safe */ 657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 658 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 659 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 660 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 661 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 662 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 663 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 664 } else { 665 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 666 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 667 } 668 } 669 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 670 } 671 672 /* Matrix of coarse basis functions (local) */ 673 if (pcbddc->coarse_phi_B) { 674 PetscInt on_B,on_primal,on_D=n_D; 675 if (pcbddc->coarse_phi_D) { 676 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 677 } 678 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 679 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 680 PetscScalar *marray; 681 682 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 683 ierr = PetscFree(marray);CHKERRQ(ierr); 684 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 688 } 689 } 690 691 if (!pcbddc->coarse_phi_B) { 692 PetscScalar *marray; 693 694 n = n_B*pcbddc->local_primal_size; 695 if (pcbddc->switch_static || pcbddc->dbg_flag) { 696 n += n_D*pcbddc->local_primal_size; 697 } 698 if (!pcbddc->issym) { 699 n *= 2; 700 } 701 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 703 n = n_B*pcbddc->local_primal_size; 704 if (pcbddc->switch_static || pcbddc->dbg_flag) { 705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 706 n += n_D*pcbddc->local_primal_size; 707 } 708 if (!pcbddc->issym) { 709 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 710 if (pcbddc->switch_static) { 711 n = n_B*pcbddc->local_primal_size; 712 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 713 } 714 } else { 715 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 717 if (pcbddc->switch_static) { 718 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 720 } 721 } 722 } 723 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 724 /* vertices */ 725 if (n_vertices) { 726 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 727 PetscScalar *x,*y; 728 PetscBLASInt B_N,B_one = 1; 729 730 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 731 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 732 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 733 if (F) { 734 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 735 } else { 736 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 737 for (i=0;i<n_vertices;i++) { 738 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 739 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 740 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 741 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 742 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 743 } 744 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 745 } 746 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 747 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 748 if (n_constraints) { 749 Mat B; 750 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 752 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 753 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 754 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 755 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 756 ierr = MatDestroy(&B);CHKERRQ(ierr); 757 } 758 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 759 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 760 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 761 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 762 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 763 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 764 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 765 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 766 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 767 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 768 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 769 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 770 /* coarse basis functions */ 771 for (i=0;i<n_vertices;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 773 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 774 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 775 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 777 y[n_B*i+idx_V_B[i]] = 1.0; 778 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 779 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 780 781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 782 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 783 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 784 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 785 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 786 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 787 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 788 } 789 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 790 } 791 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 792 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 793 } 794 795 if (n_constraints) { 796 Mat B; 797 798 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 799 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 800 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 801 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 802 if (n_vertices) { 803 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 804 } 805 ierr = MatDestroy(&B);CHKERRQ(ierr); 806 /* coarse basis functions */ 807 for (i=0;i<n_constraints;i++) { 808 PetscScalar *y; 809 810 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 811 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 812 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 813 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 814 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 815 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 816 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 817 818 if (pcbddc->switch_static || pcbddc->dbg_flag) { 819 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 820 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 821 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 824 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 825 } 826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 827 } 828 } 829 830 /* compute other basis functions for non-symmetric problems */ 831 if (!pcbddc->issym) { 832 Mat B,X; 833 834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 835 836 if (n_constraints) { 837 Mat S_VCT,S_CCT,B_C; 838 839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 840 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 841 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 842 if (n_vertices) { 843 Mat B_V; 844 845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 846 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 847 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 848 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 851 } 852 if (n_vertices) { 853 Mat A_VRT; 854 PetscBLASInt B_N,B_one = 1; 855 856 if (!n_constraints) { /* if there are no constraints, reset work */ 857 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 858 } 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 860 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 861 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 862 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 863 } 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 866 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 867 for (i=0;i<pcbddc->local_primal_size;i++) { 868 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 870 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 871 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 872 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 873 } 874 } else { 875 for (i=0;i<pcbddc->local_primal_size;i++) { 876 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 877 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 878 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 879 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 880 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 881 } 882 } 883 ierr = MatDestroy(&B);CHKERRQ(ierr); 884 /* coarse basis functions */ 885 for (i=0;i<pcbddc->local_primal_size;i++) { 886 PetscScalar *y; 887 888 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 889 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 890 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 891 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 if (i<n_vertices) { 894 y[n_B*i+idx_V_B[i]] = 1.0; 895 } 896 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 898 899 if (pcbddc->switch_static || pcbddc->dbg_flag) { 900 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 902 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 905 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 906 } 907 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 908 } 909 ierr = MatDestroy(&X);CHKERRQ(ierr); 910 } 911 912 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 913 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 914 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 915 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 916 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 917 /* Checking coarse_sub_mat and coarse basis functios */ 918 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 919 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 920 if (pcbddc->dbg_flag) { 921 Mat coarse_sub_mat; 922 Mat AUXMAT,TM1,TM2,TM3,TM4; 923 Mat coarse_phi_D,coarse_phi_B; 924 Mat coarse_psi_D,coarse_psi_B; 925 Mat A_II,A_BB,A_IB,A_BI; 926 MatType checkmattype=MATSEQAIJ; 927 PetscReal real_value; 928 929 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 930 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 931 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 932 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 933 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 934 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 935 if (unsymmetric_check) { 936 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 937 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 938 } 939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 940 941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 944 if (unsymmetric_check) { 945 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 946 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 947 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 948 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 949 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 950 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 951 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 952 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 953 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 954 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 955 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 956 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 957 } else { 958 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 959 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 960 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 961 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 962 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 963 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 964 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 965 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 966 } 967 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 968 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 969 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 970 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 971 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 972 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 973 974 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 977 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 978 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 979 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 980 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 981 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 982 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 983 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 984 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 985 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 986 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 987 if (unsymmetric_check) { 988 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 989 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 990 } 991 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 992 } 993 994 /* free memory */ 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* get back data */ 1003 *coarse_submat_vals_n = coarse_submat_vals; 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1009 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1010 { 1011 Mat *work_mat; 1012 IS isrow_s,iscol_s; 1013 PetscBool rsorted,csorted; 1014 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1019 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1020 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1021 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1022 1023 if (!rsorted) { 1024 const PetscInt *idxs; 1025 PetscInt *idxs_sorted,i; 1026 1027 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1028 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1029 for (i=0;i<rsize;i++) { 1030 idxs_perm_r[i] = i; 1031 } 1032 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1033 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1034 for (i=0;i<rsize;i++) { 1035 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1036 } 1037 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1038 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1039 } else { 1040 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1041 isrow_s = isrow; 1042 } 1043 1044 if (!csorted) { 1045 if (isrow == iscol) { 1046 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1047 iscol_s = isrow_s; 1048 } else { 1049 const PetscInt *idxs; 1050 PetscInt *idxs_sorted,i; 1051 1052 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1053 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1054 for (i=0;i<csize;i++) { 1055 idxs_perm_c[i] = i; 1056 } 1057 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1058 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1059 for (i=0;i<csize;i++) { 1060 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1061 } 1062 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1063 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1064 } 1065 } else { 1066 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1067 iscol_s = iscol; 1068 } 1069 1070 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1071 1072 if (!rsorted || !csorted) { 1073 Mat new_mat; 1074 IS is_perm_r,is_perm_c; 1075 1076 if (!rsorted) { 1077 PetscInt *idxs_r,i; 1078 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1079 for (i=0;i<rsize;i++) { 1080 idxs_r[idxs_perm_r[i]] = i; 1081 } 1082 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1083 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1084 } else { 1085 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1086 } 1087 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1088 1089 if (!csorted) { 1090 if (isrow_s == iscol_s) { 1091 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1092 is_perm_c = is_perm_r; 1093 } else { 1094 PetscInt *idxs_c,i; 1095 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1096 for (i=0;i<csize;i++) { 1097 idxs_c[idxs_perm_c[i]] = i; 1098 } 1099 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1100 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1101 } 1102 } else { 1103 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1104 } 1105 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1106 1107 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1108 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1109 work_mat[0] = new_mat; 1110 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1111 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1112 } 1113 1114 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1115 *B = work_mat[0]; 1116 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1117 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1118 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1124 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1125 { 1126 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 Mat new_mat; 1129 IS is_local,is_global; 1130 PetscInt local_size; 1131 PetscBool isseqaij; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1136 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1137 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1138 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1139 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1140 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1141 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1142 1143 /* check */ 1144 if (pcbddc->dbg_flag) { 1145 Vec x,x_change; 1146 PetscReal error; 1147 1148 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1149 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1150 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1153 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1154 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1157 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1160 ierr = VecDestroy(&x);CHKERRQ(ierr); 1161 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1162 } 1163 1164 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1165 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1166 if (isseqaij) { 1167 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1168 } else { 1169 Mat work_mat; 1170 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1172 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1173 } 1174 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1175 /* 1176 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1177 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1178 */ 1179 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1185 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1186 { 1187 PC_IS* pcis = (PC_IS*)(pc->data); 1188 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1189 IS is_aux1,is_aux2; 1190 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1191 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1192 PetscInt vbs,bs; 1193 PetscBT bitmask; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 /* 1198 No need to setup local scatters if 1199 - primal space is unchanged 1200 AND 1201 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1202 AND 1203 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1204 */ 1205 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1206 PetscFunctionReturn(0); 1207 } 1208 /* destroy old objects */ 1209 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1210 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1212 /* Set Non-overlapping dimensions */ 1213 n_B = pcis->n_B; n_D = pcis->n - n_B; 1214 n_vertices = pcbddc->n_actual_vertices; 1215 /* create auxiliary bitmask */ 1216 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1217 for (i=0;i<n_vertices;i++) { 1218 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1219 } 1220 1221 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1222 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1223 for (i=0, n_R=0; i<pcis->n; i++) { 1224 if (!PetscBTLookup(bitmask,i)) { 1225 idx_R_local[n_R] = i; 1226 n_R++; 1227 } 1228 } 1229 1230 /* Block code */ 1231 vbs = 1; 1232 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1233 if (bs>1 && !(n_vertices%bs)) { 1234 PetscBool is_blocked = PETSC_TRUE; 1235 PetscInt *vary; 1236 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1237 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1238 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1239 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1240 for (i=0; i<n_vertices; i++) { 1241 if (vary[i]!=0 && vary[i]!=bs) { 1242 is_blocked = PETSC_FALSE; 1243 break; 1244 } 1245 } 1246 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1247 vbs = bs; 1248 for (i=0;i<n_R/vbs;i++) { 1249 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1250 } 1251 } 1252 ierr = PetscFree(vary);CHKERRQ(ierr); 1253 } 1254 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1255 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1256 1257 /* print some info if requested */ 1258 if (pcbddc->dbg_flag) { 1259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1260 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1261 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1267 } 1268 1269 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1270 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1271 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1272 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1273 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1274 for (i=0; i<n_D; i++) { 1275 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1276 } 1277 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1278 for (i=0, j=0; i<n_R; i++) { 1279 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1280 aux_array1[j++] = i; 1281 } 1282 } 1283 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1284 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1285 for (i=0, j=0; i<n_B; i++) { 1286 if (!PetscBTLookup(bitmask,is_indices[i])) { 1287 aux_array2[j++] = i; 1288 } 1289 } 1290 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1291 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1292 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1293 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1294 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1295 1296 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1297 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1298 for (i=0, j=0; i<n_R; i++) { 1299 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1300 aux_array1[j++] = i; 1301 } 1302 } 1303 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1304 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1305 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1306 } 1307 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1308 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1315 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1316 { 1317 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1318 PC_IS *pcis = (PC_IS*)pc->data; 1319 PC pc_temp; 1320 Mat A_RR; 1321 MatReuse reuse; 1322 PetscScalar m_one = -1.0; 1323 PetscReal value; 1324 PetscInt n_D,n_R,ibs,mbs; 1325 PetscBool use_exact,use_exact_reduced,issbaij; 1326 PetscErrorCode ierr; 1327 /* prefixes stuff */ 1328 char dir_prefix[256],neu_prefix[256],str_level[16]; 1329 size_t len; 1330 1331 PetscFunctionBegin; 1332 1333 /* compute prefixes */ 1334 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1335 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1336 if (!pcbddc->current_level) { 1337 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1338 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1339 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1340 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1343 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1344 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1345 len -= 15; /* remove "pc_bddc_coarse_" */ 1346 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1347 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1348 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1349 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1350 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1351 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1352 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1353 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1354 } 1355 1356 /* DIRICHLET PROBLEM */ 1357 if (dirichlet) { 1358 /* Matrix for Dirichlet problem is pcis->A_II */ 1359 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1360 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1361 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1362 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1363 /* default */ 1364 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1365 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1366 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1367 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1368 if (issbaij) { 1369 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1370 } else { 1371 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1372 } 1373 /* Allow user's customization */ 1374 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1375 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1376 } 1377 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1378 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1379 if (!n_D) { 1380 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1381 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1382 } 1383 /* Set Up KSP for Dirichlet problem of BDDC */ 1384 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1385 /* set ksp_D into pcis data */ 1386 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1387 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1388 pcis->ksp_D = pcbddc->ksp_D; 1389 } 1390 1391 /* NEUMANN PROBLEM */ 1392 A_RR = 0; 1393 if (neumann) { 1394 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1395 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1396 if (pcbddc->ksp_R) { /* already created ksp */ 1397 PetscInt nn_R; 1398 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1399 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1400 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1401 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1402 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1403 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1404 reuse = MAT_INITIAL_MATRIX; 1405 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1406 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1407 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1408 reuse = MAT_INITIAL_MATRIX; 1409 } else { /* safe to reuse the matrix */ 1410 reuse = MAT_REUSE_MATRIX; 1411 } 1412 } 1413 /* last check */ 1414 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1415 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1416 reuse = MAT_INITIAL_MATRIX; 1417 } 1418 } else { /* first time, so we need to create the matrix */ 1419 reuse = MAT_INITIAL_MATRIX; 1420 } 1421 /* extract A_RR */ 1422 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1423 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1424 if (ibs != mbs) { 1425 Mat newmat; 1426 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1427 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1428 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1429 } else { 1430 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1431 } 1432 if (!pcbddc->ksp_R) { /* create object if not present */ 1433 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1434 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1435 /* default */ 1436 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1437 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1438 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1439 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1440 if (issbaij) { 1441 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1442 } else { 1443 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1444 } 1445 /* Allow user's customization */ 1446 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1447 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1448 } 1449 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1450 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1451 if (!n_R) { 1452 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1453 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1454 } 1455 /* Set Up KSP for Neumann problem of BDDC */ 1456 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1457 } 1458 1459 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1460 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1461 if (pcbddc->dbg_flag) { 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1465 } 1466 if (dirichlet) { /* Dirichlet */ 1467 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1468 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1469 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1470 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1471 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1472 /* need to be adapted? */ 1473 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1474 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1475 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1476 /* print info */ 1477 if (pcbddc->dbg_flag) { 1478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1480 } 1481 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1482 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1483 } 1484 } 1485 if (neumann) { /* Neumann */ 1486 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1487 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1488 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1489 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1490 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1491 /* need to be adapted? */ 1492 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1493 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1494 /* print info */ 1495 if (pcbddc->dbg_flag) { 1496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1497 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1498 } 1499 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1500 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1501 } 1502 } 1503 } 1504 /* free Neumann problem's matrix */ 1505 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1511 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1512 { 1513 PetscErrorCode ierr; 1514 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1515 1516 PetscFunctionBegin; 1517 if (applytranspose) { 1518 if (pcbddc->local_auxmat1) { 1519 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1520 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1521 } 1522 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1523 } else { 1524 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1525 if (pcbddc->local_auxmat1) { 1526 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1527 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1528 } 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 1533 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1536 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1537 { 1538 PetscErrorCode ierr; 1539 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1540 PC_IS* pcis = (PC_IS*) (pc->data); 1541 const PetscScalar zero = 0.0; 1542 1543 PetscFunctionBegin; 1544 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1545 if (applytranspose) { 1546 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1547 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1548 } else { 1549 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1550 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1551 } 1552 /* start communications from local primal nodes to rhs of coarse solver */ 1553 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1554 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1558 /* TODO remove null space when doing multilevel */ 1559 if (pcbddc->coarse_ksp) { 1560 if (applytranspose) { 1561 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1562 } else { 1563 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1564 } 1565 } 1566 1567 /* Local solution on R nodes */ 1568 if (pcis->n) { 1569 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1572 if (pcbddc->switch_static) { 1573 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1575 } 1576 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1577 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1578 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 if (pcbddc->switch_static) { 1581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 } 1584 } 1585 1586 /* communications from coarse sol to local primal nodes */ 1587 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1588 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1589 1590 /* Sum contributions from two levels */ 1591 if (applytranspose) { 1592 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1593 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1594 } else { 1595 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1604 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1605 { 1606 PetscErrorCode ierr; 1607 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1608 PetscScalar *array,*array2; 1609 Vec from,to; 1610 1611 PetscFunctionBegin; 1612 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1613 from = pcbddc->coarse_vec; 1614 to = pcbddc->vec1_P; 1615 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1616 Vec tvec; 1617 PetscInt lsize; 1618 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1619 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1620 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1621 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1622 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1623 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1624 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1625 } 1626 } else { /* from local to global -> put data in coarse right hand side */ 1627 from = pcbddc->vec1_P; 1628 to = pcbddc->coarse_vec; 1629 } 1630 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1636 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1637 { 1638 PetscErrorCode ierr; 1639 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1640 PetscScalar *array,*array2; 1641 Vec from,to; 1642 1643 PetscFunctionBegin; 1644 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1645 from = pcbddc->coarse_vec; 1646 to = pcbddc->vec1_P; 1647 } else { /* from local to global -> put data in coarse right hand side */ 1648 from = pcbddc->vec1_P; 1649 to = pcbddc->coarse_vec; 1650 } 1651 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1652 if (smode == SCATTER_FORWARD) { 1653 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1654 Vec tvec; 1655 PetscInt lsize; 1656 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1657 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1658 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1659 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1660 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1661 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1662 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1663 } 1664 } 1665 PetscFunctionReturn(0); 1666 } 1667 1668 /* uncomment for testing purposes */ 1669 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1672 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1673 { 1674 PetscErrorCode ierr; 1675 PC_IS* pcis = (PC_IS*)(pc->data); 1676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1677 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1678 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1679 MatType impMatType=MATSEQAIJ; 1680 /* one and zero */ 1681 PetscScalar one=1.0,zero=0.0; 1682 /* space to store constraints and their local indices */ 1683 PetscScalar *temp_quadrature_constraint; 1684 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1685 /* iterators */ 1686 PetscInt i,j,k,total_counts,temp_start_ptr; 1687 /* stuff to store connected components stored in pcbddc->mat_graph */ 1688 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1689 PetscInt n_ISForFaces,n_ISForEdges; 1690 /* near null space stuff */ 1691 MatNullSpace nearnullsp; 1692 const Vec *nearnullvecs; 1693 Vec *localnearnullsp; 1694 PetscBool nnsp_has_cnst; 1695 PetscInt nnsp_size; 1696 PetscScalar *array; 1697 /* BLAS integers */ 1698 PetscBLASInt lwork,lierr; 1699 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1700 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1701 /* LAPACK working arrays for SVD or POD */ 1702 PetscBool skip_lapack; 1703 PetscScalar *work; 1704 PetscReal *singular_vals; 1705 #if defined(PETSC_USE_COMPLEX) 1706 PetscReal *rwork; 1707 #endif 1708 #if defined(PETSC_MISSING_LAPACK_GESVD) 1709 PetscBLASInt Blas_one_2=1; 1710 PetscScalar *temp_basis,*correlation_mat; 1711 #else 1712 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1713 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1714 #endif 1715 /* reuse */ 1716 PetscInt olocal_primal_size; 1717 PetscInt *oprimal_indices_local_idxs; 1718 /* change of basis */ 1719 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1720 PetscBool boolforchange,qr_needed; 1721 PetscBT touched,change_basis,qr_needed_idx; 1722 /* auxiliary stuff */ 1723 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1724 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1725 PetscScalar *temp_quadrature_constraint_work; 1726 /* some quantities */ 1727 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1728 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1729 1730 1731 PetscFunctionBegin; 1732 /* Destroy Mat objects computed previously */ 1733 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1734 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1735 /* TODO synch with adaptive selection */ 1736 /* Get index sets for faces, edges and vertices from graph */ 1737 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1738 /* free unneeded index sets */ 1739 if (!pcbddc->use_vertices) { 1740 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1741 } 1742 if (!pcbddc->use_edges) { 1743 for (i=0;i<n_ISForEdges;i++) { 1744 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1745 } 1746 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1747 n_ISForEdges = 0; 1748 } 1749 if (!pcbddc->use_faces) { 1750 for (i=0;i<n_ISForFaces;i++) { 1751 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1752 } 1753 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1754 n_ISForFaces = 0; 1755 } 1756 /* HACKS (the following two blocks of code) */ 1757 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1758 pcbddc->use_change_of_basis = PETSC_TRUE; 1759 if (!ISForEdges) { 1760 pcbddc->use_change_on_faces = PETSC_TRUE; 1761 } 1762 } 1763 if (pcbddc->NullSpace) { 1764 /* use_change_of_basis should be consistent among processors */ 1765 PetscBool tbool[2],gbool[2]; 1766 tbool [0] = pcbddc->use_change_of_basis; 1767 tbool [1] = pcbddc->use_change_on_faces; 1768 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1769 pcbddc->use_change_of_basis = gbool[0]; 1770 pcbddc->use_change_on_faces = gbool[1]; 1771 } 1772 /* print some info */ 1773 if (pcbddc->dbg_flag) { 1774 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1776 i = 0; 1777 if (ISForVertices) { 1778 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1779 } 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1782 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1783 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1784 } 1785 1786 if (!pcbddc->adaptive_selection) { 1787 /* check if near null space is attached to global mat */ 1788 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1789 if (nearnullsp) { 1790 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1791 /* remove any stored info */ 1792 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1793 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1794 /* store information for BDDC solver reuse */ 1795 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1796 pcbddc->onearnullspace = nearnullsp; 1797 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1798 for (i=0;i<nnsp_size;i++) { 1799 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1800 } 1801 } else { /* if near null space is not provided BDDC uses constants by default */ 1802 nnsp_size = 0; 1803 nnsp_has_cnst = PETSC_TRUE; 1804 } 1805 /* get max number of constraints on a single cc */ 1806 max_constraints = nnsp_size; 1807 if (nnsp_has_cnst) max_constraints++; 1808 1809 /* 1810 Evaluate maximum storage size needed by the procedure 1811 - temp_indices will contain start index of each constraint stored as follows 1812 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1813 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1814 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1815 */ 1816 total_counts = n_ISForFaces+n_ISForEdges; 1817 total_counts *= max_constraints; 1818 n_vertices = 0; 1819 if (ISForVertices) { 1820 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1821 } 1822 total_counts += n_vertices; 1823 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1824 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1825 total_counts = 0; 1826 max_size_of_constraint = 0; 1827 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1828 if (i<n_ISForEdges) { 1829 used_IS = &ISForEdges[i]; 1830 } else { 1831 used_IS = &ISForFaces[i-n_ISForEdges]; 1832 } 1833 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1834 total_counts += j; 1835 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1836 } 1837 total_counts *= max_constraints; 1838 total_counts += n_vertices; 1839 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1840 /* get local part of global near null space vectors */ 1841 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1842 for (k=0;k<nnsp_size;k++) { 1843 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1844 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 } 1847 1848 /* whether or not to skip lapack calls */ 1849 skip_lapack = PETSC_TRUE; 1850 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1851 1852 /* allocate some auxiliary stuff */ 1853 if (!skip_lapack || pcbddc->use_qr_single) { 1854 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1855 } 1856 1857 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1858 if (!skip_lapack) { 1859 PetscScalar temp_work; 1860 1861 #if defined(PETSC_MISSING_LAPACK_GESVD) 1862 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1863 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1864 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1865 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1866 #if defined(PETSC_USE_COMPLEX) 1867 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1868 #endif 1869 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1870 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1872 lwork = -1; 1873 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1874 #if !defined(PETSC_USE_COMPLEX) 1875 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1876 #else 1877 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1878 #endif 1879 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1880 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1881 #else /* on missing GESVD */ 1882 /* SVD */ 1883 PetscInt max_n,min_n; 1884 max_n = max_size_of_constraint; 1885 min_n = max_constraints; 1886 if (max_size_of_constraint < max_constraints) { 1887 min_n = max_size_of_constraint; 1888 max_n = max_constraints; 1889 } 1890 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1891 #if defined(PETSC_USE_COMPLEX) 1892 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1893 #endif 1894 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1895 lwork = -1; 1896 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1897 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1899 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1900 #if !defined(PETSC_USE_COMPLEX) 1901 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1902 #else 1903 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1904 #endif 1905 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1906 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1907 #endif /* on missing GESVD */ 1908 /* Allocate optimal workspace */ 1909 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1910 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1911 } 1912 /* Now we can loop on constraining sets */ 1913 total_counts = 0; 1914 temp_indices[0] = 0; 1915 /* vertices */ 1916 if (ISForVertices) { 1917 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1918 if (nnsp_has_cnst) { /* consider all vertices */ 1919 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1920 for (i=0;i<n_vertices;i++) { 1921 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1922 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1923 total_counts++; 1924 } 1925 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1926 PetscBool used_vertex; 1927 for (i=0;i<n_vertices;i++) { 1928 used_vertex = PETSC_FALSE; 1929 k = 0; 1930 while (!used_vertex && k<nnsp_size) { 1931 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1932 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1933 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1934 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1935 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1936 total_counts++; 1937 used_vertex = PETSC_TRUE; 1938 } 1939 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1940 k++; 1941 } 1942 } 1943 } 1944 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1945 n_vertices = total_counts; 1946 } 1947 1948 /* edges and faces */ 1949 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1950 if (ncc<n_ISForEdges) { 1951 used_IS = &ISForEdges[ncc]; 1952 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1953 } else { 1954 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1955 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1956 } 1957 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1958 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1959 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1960 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1961 /* change of basis should not be performed on local periodic nodes */ 1962 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1963 if (nnsp_has_cnst) { 1964 PetscScalar quad_value; 1965 temp_constraints++; 1966 if (!pcbddc->use_nnsp_true) { 1967 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1968 } else { 1969 quad_value = 1.0; 1970 } 1971 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1972 for (j=0;j<size_of_constraint;j++) { 1973 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1974 } 1975 /* sort by global ordering if using lapack subroutines (not needed!) */ 1976 if (!skip_lapack || pcbddc->use_qr_single) { 1977 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1978 for (j=0;j<size_of_constraint;j++) { 1979 permutation[j]=j; 1980 } 1981 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1982 for (j=0;j<size_of_constraint;j++) { 1983 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 1984 } 1985 for (j=0;j<size_of_constraint;j++) { 1986 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1987 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1988 } 1989 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1990 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1991 } 1992 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1993 total_counts++; 1994 } 1995 for (k=0;k<nnsp_size;k++) { 1996 PetscReal real_value; 1997 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1998 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1999 for (j=0;j<size_of_constraint;j++) { 2000 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2001 } 2002 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2003 /* check if array is null on the connected component */ 2004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2005 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2006 if (real_value > 0.0) { /* keep indices and values */ 2007 /* sort by global ordering if using lapack subroutines */ 2008 if (!skip_lapack || pcbddc->use_qr_single) { 2009 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2010 for (j=0;j<size_of_constraint;j++) { 2011 permutation[j]=j; 2012 } 2013 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2014 for (j=0;j<size_of_constraint;j++) { 2015 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2016 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2017 } 2018 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2019 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2020 } 2021 temp_constraints++; 2022 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2023 total_counts++; 2024 } 2025 } 2026 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2027 valid_constraints = temp_constraints; 2028 if (!pcbddc->use_nnsp_true && temp_constraints) { 2029 if (temp_constraints == 1) { /* just normalize the constraint */ 2030 PetscScalar norm; 2031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2032 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2033 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2034 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2035 } else { /* perform SVD */ 2036 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2037 2038 #if defined(PETSC_MISSING_LAPACK_GESVD) 2039 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2040 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2041 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2042 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2043 from that computed using LAPACKgesvd 2044 -> This is due to a different computation of eigenvectors in LAPACKheev 2045 -> The quality of the POD-computed basis will be the same */ 2046 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2047 /* Store upper triangular part of correlation matrix */ 2048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2049 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2050 for (j=0;j<temp_constraints;j++) { 2051 for (k=0;k<j+1;k++) { 2052 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2053 } 2054 } 2055 /* compute eigenvalues and eigenvectors of correlation matrix */ 2056 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2058 #if !defined(PETSC_USE_COMPLEX) 2059 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2060 #else 2061 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2062 #endif 2063 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2064 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2065 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2066 j = 0; 2067 while (j < temp_constraints && singular_vals[j] < tol) j++; 2068 total_counts = total_counts-j; 2069 valid_constraints = temp_constraints-j; 2070 /* scale and copy POD basis into used quadrature memory */ 2071 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2072 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2073 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2075 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2077 if (j<temp_constraints) { 2078 PetscInt ii; 2079 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2080 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2081 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2082 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2083 for (k=0;k<temp_constraints-j;k++) { 2084 for (ii=0;ii<size_of_constraint;ii++) { 2085 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2086 } 2087 } 2088 } 2089 #else /* on missing GESVD */ 2090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2093 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2094 #if !defined(PETSC_USE_COMPLEX) 2095 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2096 #else 2097 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2098 #endif 2099 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2102 k = temp_constraints; 2103 if (k > size_of_constraint) k = size_of_constraint; 2104 j = 0; 2105 while (j < k && singular_vals[k-j-1] < tol) j++; 2106 valid_constraints = k-j; 2107 total_counts = total_counts-temp_constraints+valid_constraints; 2108 #endif /* on missing GESVD */ 2109 } 2110 } 2111 /* setting change_of_basis flag is safe now */ 2112 if (boolforchange) { 2113 for (j=0;j<valid_constraints;j++) { 2114 PetscBTSet(change_basis,total_counts-j-1); 2115 } 2116 } 2117 } 2118 /* free workspace */ 2119 if (!skip_lapack || pcbddc->use_qr_single) { 2120 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2121 } 2122 if (!skip_lapack) { 2123 ierr = PetscFree(work);CHKERRQ(ierr); 2124 #if defined(PETSC_USE_COMPLEX) 2125 ierr = PetscFree(rwork);CHKERRQ(ierr); 2126 #endif 2127 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2128 #if defined(PETSC_MISSING_LAPACK_GESVD) 2129 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2130 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2131 #endif 2132 } 2133 for (k=0;k<nnsp_size;k++) { 2134 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2135 } 2136 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2137 } else { 2138 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2139 PetscInt cum = 0; 2140 2141 total_counts = 0; 2142 n_vertices = 0; 2143 if (sub_schurs->is_Ej_com) { 2144 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2145 } 2146 max_constraints = 0; 2147 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2148 total_counts += pcbddc->adaptive_constraints_n[i]; 2149 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2150 } 2151 2152 temp_indices = pcbddc->adaptive_constraints_ptrs; 2153 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2154 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2155 2156 #if 0 2157 printf("Found %d totals\n",total_counts); 2158 for (i=0;i<total_counts;i++) { 2159 printf("const %d, start %d",i,temp_indices[i]); 2160 printf(" end %d:\n",temp_indices[i+1]); 2161 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2162 printf(" idxs %d",temp_indices_to_constraint[j]); 2163 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2164 } 2165 } 2166 #endif 2167 2168 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2169 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2170 /* Change of basis */ 2171 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2172 cum = 0; 2173 if (pcbddc->use_change_of_basis) { 2174 for (i=0;i<sub_schurs->n_subs;i++) { 2175 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2176 for (j=0;j<pcbddc->adaptive_constraints_n[i];j++) { 2177 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2178 } 2179 } 2180 cum += pcbddc->adaptive_constraints_n[i]; 2181 } 2182 } 2183 } 2184 2185 /* free index sets of faces, edges and vertices */ 2186 for (i=0;i<n_ISForFaces;i++) { 2187 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2188 } 2189 if (n_ISForFaces) { 2190 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2191 } 2192 for (i=0;i<n_ISForEdges;i++) { 2193 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2194 } 2195 if (n_ISForEdges) { 2196 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2197 } 2198 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2199 2200 /* map temp_indices_to_constraint in boundary numbering */ 2201 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2202 if (i != temp_indices[total_counts]) { 2203 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2204 } 2205 2206 /* set quantities in pcbddc data structure and store previous primal size */ 2207 /* n_vertices defines the number of subdomain corners in the primal space */ 2208 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2209 olocal_primal_size = pcbddc->local_primal_size; 2210 pcbddc->local_primal_size = total_counts; 2211 pcbddc->n_vertices = n_vertices; 2212 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2213 2214 /* Create constraint matrix */ 2215 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2216 /* so we need to set it up properly either with or without change of basis */ 2217 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2218 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2219 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2220 /* array to compute a local numbering of constraints : vertices first then constraints */ 2221 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2222 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2223 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2224 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2225 /* auxiliary stuff for basis change */ 2226 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2227 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2228 2229 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2230 total_primal_vertices=0; 2231 for (i=0;i<pcbddc->local_primal_size;i++) { 2232 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2233 if (size_of_constraint == 1) { 2234 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2235 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2236 aux_primal_minloc[total_primal_vertices]=0; 2237 total_primal_vertices++; 2238 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2239 PetscInt min_loc,min_index; 2240 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2241 /* find first untouched local node */ 2242 k = 0; 2243 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2244 min_index = global_indices[k]; 2245 min_loc = k; 2246 /* search the minimum among global nodes already untouched on the cc */ 2247 for (k=1;k<size_of_constraint;k++) { 2248 /* there can be more than one constraint on a single connected component */ 2249 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2250 min_index = global_indices[k]; 2251 min_loc = k; 2252 } 2253 } 2254 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2255 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2256 aux_primal_minloc[total_primal_vertices]=min_loc; 2257 total_primal_vertices++; 2258 } 2259 } 2260 /* determine if a QR strategy is needed for change of basis */ 2261 qr_needed = PETSC_FALSE; 2262 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2263 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2264 if (PetscBTLookup(change_basis,i)) { 2265 if (!pcbddc->use_qr_single) { 2266 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2267 j = 0; 2268 for (k=0;k<size_of_constraint;k++) { 2269 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2270 j++; 2271 } 2272 } 2273 /* found more than one primal dof on the cc */ 2274 if (j > 1) { 2275 PetscBTSet(qr_needed_idx,i); 2276 qr_needed = PETSC_TRUE; 2277 } 2278 } else { 2279 PetscBTSet(qr_needed_idx,i); 2280 qr_needed = PETSC_TRUE; 2281 } 2282 } 2283 } 2284 /* free workspace */ 2285 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2286 2287 /* permute indices in order to have a sorted set of vertices */ 2288 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2289 2290 /* nonzero structure of constraint matrix */ 2291 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2292 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2293 j=total_primal_vertices; 2294 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2295 if (!PetscBTLookup(change_basis,i)) { 2296 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2297 j++; 2298 } 2299 } 2300 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2301 ierr = PetscFree(nnz);CHKERRQ(ierr); 2302 /* set values in constraint matrix */ 2303 for (i=0;i<total_primal_vertices;i++) { 2304 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2305 } 2306 total_counts = total_primal_vertices; 2307 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2308 if (!PetscBTLookup(change_basis,i)) { 2309 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2310 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2311 total_counts++; 2312 } 2313 } 2314 /* assembling */ 2315 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2316 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 /* 2318 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2319 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2320 */ 2321 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2322 if (pcbddc->use_change_of_basis) { 2323 /* dual and primal dofs on a single cc */ 2324 PetscInt dual_dofs,primal_dofs; 2325 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2326 PetscInt primal_counter; 2327 /* working stuff for GEQRF */ 2328 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2329 PetscBLASInt lqr_work; 2330 /* working stuff for UNGQR */ 2331 PetscScalar *gqr_work,lgqr_work_t; 2332 PetscBLASInt lgqr_work; 2333 /* working stuff for TRTRS */ 2334 PetscScalar *trs_rhs; 2335 PetscBLASInt Blas_NRHS; 2336 /* pointers for values insertion into change of basis matrix */ 2337 PetscInt *start_rows,*start_cols; 2338 PetscScalar *start_vals; 2339 /* working stuff for values insertion */ 2340 PetscBT is_primal; 2341 /* matrix sizes */ 2342 PetscInt global_size,local_size; 2343 /* work array for nonzeros */ 2344 PetscScalar *nnz_array; 2345 /* temporary change of basis */ 2346 Mat localChangeOfBasisMatrix; 2347 /* auxiliary work for global change of basis */ 2348 Vec nnz_vec; 2349 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2350 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2351 PetscScalar *vals; 2352 PetscBool done; 2353 2354 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2355 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2356 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2357 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2358 2359 /* nonzeros for local mat */ 2360 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2361 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2362 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2363 if (PetscBTLookup(change_basis,i)) { 2364 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2365 if (PetscBTLookup(qr_needed_idx,i)) { 2366 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2367 } else { 2368 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2369 /* get local primal index on the cc */ 2370 j = 0; 2371 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2372 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2373 } 2374 } 2375 } 2376 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2377 /* Set initial identity in the matrix */ 2378 for (i=0;i<pcis->n_B;i++) { 2379 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2380 } 2381 2382 if (pcbddc->dbg_flag) { 2383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2385 } 2386 2387 2388 /* Now we loop on the constraints which need a change of basis */ 2389 /* 2390 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2391 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2392 2393 Basic blocks of change of basis matrix T computed by 2394 2395 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2396 2397 | 1 0 ... 0 s_1/S | 2398 | 0 1 ... 0 s_2/S | 2399 | ... | 2400 | 0 ... 1 s_{n-1}/S | 2401 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2402 2403 with S = \sum_{i=1}^n s_i^2 2404 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2405 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2406 2407 - QR decomposition of constraints otherwise 2408 */ 2409 if (qr_needed) { 2410 /* space to store Q */ 2411 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2412 /* first we issue queries for optimal work */ 2413 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2414 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2416 lqr_work = -1; 2417 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2418 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2419 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2420 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2421 lgqr_work = -1; 2422 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2423 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2424 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2425 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2426 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2427 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2428 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2429 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2430 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2431 /* array to store scaling factors for reflectors */ 2432 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2433 /* array to store rhs and solution of triangular solver */ 2434 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2435 /* allocating workspace for check */ 2436 if (pcbddc->dbg_flag) { 2437 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2438 } 2439 } 2440 /* array to store whether a node is primal or not */ 2441 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2442 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2443 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2444 if (i != total_primal_vertices) { 2445 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2446 } 2447 for (i=0;i<total_primal_vertices;i++) { 2448 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2449 } 2450 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2451 2452 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2453 /* -> using implicit ordering contained in temp_indices data */ 2454 total_counts = pcbddc->n_vertices; 2455 primal_counter = total_counts; 2456 while (total_counts<pcbddc->local_primal_size) { 2457 primal_dofs = 1; 2458 if (PetscBTLookup(change_basis,total_counts)) { 2459 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2460 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2461 primal_dofs++; 2462 } 2463 /* get constraint info */ 2464 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2465 dual_dofs = size_of_constraint-primal_dofs; 2466 2467 if (pcbddc->dbg_flag) { 2468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2469 } 2470 2471 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2472 2473 /* copy quadrature constraints for change of basis check */ 2474 if (pcbddc->dbg_flag) { 2475 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2476 } 2477 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2478 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2479 2480 /* compute QR decomposition of constraints */ 2481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2482 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2484 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2485 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2486 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2487 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2488 2489 /* explictly compute R^-T */ 2490 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2491 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2492 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2493 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2495 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2496 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2497 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2498 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2499 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2500 2501 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2504 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2506 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2507 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2508 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2510 2511 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2512 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2513 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2515 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2516 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2518 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2520 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2521 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2523 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2524 2525 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2526 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2527 /* insert cols for primal dofs */ 2528 for (j=0;j<primal_dofs;j++) { 2529 start_vals = &qr_basis[j*size_of_constraint]; 2530 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2531 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2532 } 2533 /* insert cols for dual dofs */ 2534 for (j=0,k=0;j<dual_dofs;k++) { 2535 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2536 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2537 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2538 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2539 j++; 2540 } 2541 } 2542 2543 /* check change of basis */ 2544 if (pcbddc->dbg_flag) { 2545 PetscInt ii,jj; 2546 PetscBool valid_qr=PETSC_TRUE; 2547 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2548 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2552 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2553 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2554 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2555 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2556 for (jj=0;jj<size_of_constraint;jj++) { 2557 for (ii=0;ii<primal_dofs;ii++) { 2558 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2559 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2560 } 2561 } 2562 if (!valid_qr) { 2563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2564 for (jj=0;jj<size_of_constraint;jj++) { 2565 for (ii=0;ii<primal_dofs;ii++) { 2566 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2567 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2568 } 2569 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2570 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2571 } 2572 } 2573 } 2574 } else { 2575 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2576 } 2577 } 2578 } else { /* simple transformation block */ 2579 PetscInt row,col; 2580 PetscScalar val,norm; 2581 2582 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2583 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2584 for (j=0;j<size_of_constraint;j++) { 2585 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2586 if (!PetscBTLookup(is_primal,row)) { 2587 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2588 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2589 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2590 } else { 2591 for (k=0;k<size_of_constraint;k++) { 2592 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2593 if (row != col) { 2594 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2595 } else { 2596 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2597 } 2598 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2599 } 2600 } 2601 } 2602 if (pcbddc->dbg_flag) { 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2604 } 2605 } 2606 /* increment primal counter */ 2607 primal_counter += primal_dofs; 2608 } else { 2609 if (pcbddc->dbg_flag) { 2610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2611 } 2612 } 2613 /* increment constraint counter total_counts */ 2614 total_counts += primal_dofs; 2615 } 2616 2617 /* free workspace */ 2618 if (qr_needed) { 2619 if (pcbddc->dbg_flag) { 2620 ierr = PetscFree(work);CHKERRQ(ierr); 2621 } 2622 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2623 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2624 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2625 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2626 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2627 } 2628 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2629 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2630 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2631 2632 /* assembling of global change of variable */ 2633 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2634 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2635 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2636 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2637 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2638 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2639 2640 /* nonzeros (overestimated) */ 2641 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2642 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2643 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2644 for (i=0;i<pcis->n;i++) { 2645 nnz_array[i] = 1.0; 2646 idxs_all[i] = i; 2647 } 2648 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2649 for (i=0;i<pcis->n_B;i++) { 2650 nnz_array[idxs_B[i]] = nnz[i]; 2651 } 2652 if (pcis->n) { 2653 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2654 } 2655 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2656 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2657 ierr = PetscFree(nnz);CHKERRQ(ierr); 2658 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2659 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2660 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2661 for (i=0;i<local_size;i++) { 2662 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2663 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2664 } 2665 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2666 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2667 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2668 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2669 2670 /* Set identity on dirichlet dofs */ 2671 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2672 for (i=0;i<pcis->n-pcis->n_B;i++) { 2673 PetscScalar one=1.0; 2674 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2675 } 2676 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2677 2678 /* Set values at interface dofs */ 2679 done = PETSC_TRUE; 2680 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2681 if (!done) { 2682 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2683 } 2684 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2685 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2686 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2687 for (i=0;i<nvtxs;i++) { 2688 PetscInt row,*cols,ncols; 2689 PetscScalar *mat_vals; 2690 2691 row = idxs_B[i]; 2692 ncols = xadj[i+1]-xadj[i]; 2693 cols = idxs_mapped+xadj[i]; 2694 mat_vals = vals+xadj[i]; 2695 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2696 } 2697 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2698 if (!done) { 2699 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2700 } 2701 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2702 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2703 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2704 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 2707 /* check */ 2708 if (pcbddc->dbg_flag) { 2709 PetscReal error; 2710 Vec x,x_change; 2711 2712 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2713 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2714 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2715 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2719 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2720 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2721 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2722 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2723 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2724 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2725 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2726 ierr = VecDestroy(&x);CHKERRQ(ierr); 2727 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2728 } 2729 2730 /* adapt sub_schurs computed (if any) */ 2731 if (pcbddc->use_deluxe_scaling) { 2732 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2733 if (sub_schurs->n_subs_par_g) { 2734 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2735 } 2736 if (sub_schurs->S_Ej_all) { 2737 Mat S_1,S_2,tmat; 2738 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,sub_schurs->is_Ej_all,sub_schurs->is_Ej_all,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2739 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2740 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2741 sub_schurs->S_Ej_all = S_1; 2742 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2743 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2744 sub_schurs->sum_S_Ej_all = S_2; 2745 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2746 } 2747 } 2748 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2749 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2750 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2751 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2752 } 2753 2754 /* set up change of basis context */ 2755 if (pcbddc->ChangeOfBasisMatrix) { 2756 PCBDDCChange_ctx change_ctx; 2757 2758 if (!pcbddc->new_global_mat) { 2759 PetscInt global_size,local_size; 2760 2761 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2762 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2763 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2764 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2765 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2766 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2767 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2768 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2769 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2770 } else { 2771 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2772 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2773 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2774 } 2775 if (!pcbddc->user_ChangeOfBasisMatrix) { 2776 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2777 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2778 } else { 2779 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2780 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2781 } 2782 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2783 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2784 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 } 2787 2788 /* get indices in local ordering for vertices and constraints */ 2789 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2790 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2791 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2792 } 2793 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2794 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2795 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2796 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2797 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2798 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2799 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2800 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2801 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2802 /* set quantities in PCBDDC data struct */ 2803 pcbddc->n_actual_vertices = i; 2804 /* check if a new primal space has been introduced */ 2805 pcbddc->new_primal_space_local = PETSC_TRUE; 2806 if (olocal_primal_size == pcbddc->local_primal_size) { 2807 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2808 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2809 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2810 } 2811 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2812 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2813 2814 /* flush dbg viewer */ 2815 if (pcbddc->dbg_flag) { 2816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2817 } 2818 2819 /* free workspace */ 2820 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2821 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2822 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2823 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2824 if (!pcbddc->adaptive_selection) { 2825 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2826 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2827 } else { 2828 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2829 pcbddc->adaptive_constraints_ptrs, 2830 pcbddc->adaptive_constraints_idxs, 2831 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2832 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2833 } 2834 PetscFunctionReturn(0); 2835 } 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2839 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2840 { 2841 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2842 PC_IS *pcis = (PC_IS*)pc->data; 2843 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2844 PetscInt ierr,i,vertex_size; 2845 PetscViewer viewer=pcbddc->dbg_viewer; 2846 2847 PetscFunctionBegin; 2848 /* Reset previously computed graph */ 2849 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2850 /* Init local Graph struct */ 2851 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2852 2853 /* Check validity of the csr graph passed in by the user */ 2854 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2855 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2856 } 2857 2858 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2859 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2860 Mat mat_adj; 2861 PetscInt *xadj,*adjncy; 2862 PetscInt nvtxs; 2863 PetscBool flg_row=PETSC_TRUE; 2864 2865 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2866 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2867 if (!flg_row) { 2868 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2869 } 2870 if (pcbddc->use_local_adj) { 2871 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2872 pcbddc->computed_rowadj = PETSC_TRUE; 2873 } else { /* just compute subdomain's connected components */ 2874 IS is_dummy; 2875 ISLocalToGlobalMapping l2gmap_dummy; 2876 PetscInt j,sum; 2877 PetscInt *cxadj,*cadjncy; 2878 const PetscInt *idxs; 2879 PCBDDCGraph graph; 2880 PetscBT is_on_boundary; 2881 2882 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2883 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2884 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2885 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2886 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2887 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2888 graph->xadj = xadj; 2889 graph->adjncy = adjncy; 2890 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2891 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2892 2893 if (pcbddc->dbg_flag) { 2894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2895 for (i=0;i<graph->ncc;i++) { 2896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2897 } 2898 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2899 } 2900 2901 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2902 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2903 for (i=0;i<pcis->n_B;i++) { 2904 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2905 } 2906 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2907 2908 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2909 sum = 0; 2910 for (i=0;i<graph->ncc;i++) { 2911 PetscInt sizecc = 0; 2912 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2913 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2914 sizecc++; 2915 } 2916 } 2917 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2918 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2919 cxadj[graph->queue[j]] = sizecc; 2920 } 2921 } 2922 sum += sizecc*sizecc; 2923 } 2924 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2925 sum = 0; 2926 for (i=0;i<nvtxs;i++) { 2927 PetscInt temp = cxadj[i]; 2928 cxadj[i] = sum; 2929 sum += temp; 2930 } 2931 cxadj[nvtxs] = sum; 2932 for (i=0;i<graph->ncc;i++) { 2933 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2934 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2935 PetscInt k,sizecc = 0; 2936 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2937 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2938 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2939 sizecc++; 2940 } 2941 } 2942 } 2943 } 2944 } 2945 if (nvtxs) { 2946 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2947 } else { 2948 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2949 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2950 } 2951 graph->xadj = 0; 2952 graph->adjncy = 0; 2953 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2954 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2955 } 2956 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2957 if (!flg_row) { 2958 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2959 } 2960 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2961 } 2962 2963 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2964 vertex_size = 1; 2965 if (pcbddc->user_provided_isfordofs) { 2966 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2967 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2968 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2969 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2970 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2971 } 2972 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2973 pcbddc->n_ISForDofs = 0; 2974 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2975 } 2976 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2977 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2978 } else { 2979 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2980 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2981 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2982 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2983 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2984 } 2985 } 2986 } 2987 2988 /* Setup of Graph */ 2989 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2990 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2991 } 2992 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2993 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2994 } 2995 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2996 2997 /* Graph's connected components analysis */ 2998 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2999 3000 /* print some info to stdout */ 3001 if (pcbddc->dbg_flag) { 3002 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 3003 } 3004 3005 /* mark topography has done */ 3006 pcbddc->recompute_topography = PETSC_FALSE; 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3012 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3013 { 3014 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3015 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3016 PetscErrorCode ierr; 3017 3018 PetscFunctionBegin; 3019 n = 0; 3020 vertices = 0; 3021 if (pcbddc->ConstraintMatrix) { 3022 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3023 for (i=0;i<local_primal_size;i++) { 3024 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3025 if (size_of_constraint == 1) n++; 3026 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3027 } 3028 if (vertices_idx) { 3029 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3030 n = 0; 3031 for (i=0;i<local_primal_size;i++) { 3032 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3033 if (size_of_constraint == 1) { 3034 vertices[n++]=row_cmat_indices[0]; 3035 } 3036 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3037 } 3038 } 3039 } 3040 *n_vertices = n; 3041 if (vertices_idx) *vertices_idx = vertices; 3042 PetscFunctionReturn(0); 3043 } 3044 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3047 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3048 { 3049 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3050 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3051 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3052 PetscBT touched; 3053 PetscErrorCode ierr; 3054 3055 /* This function assumes that the number of local constraints per connected component 3056 is not greater than the number of nodes defined for the connected component 3057 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3058 PetscFunctionBegin; 3059 n = 0; 3060 constraints_index = 0; 3061 if (pcbddc->ConstraintMatrix) { 3062 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3063 max_size_of_constraint = 0; 3064 for (i=0;i<local_primal_size;i++) { 3065 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3066 if (size_of_constraint > 1) { 3067 n++; 3068 } 3069 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3070 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3071 } 3072 if (constraints_idx) { 3073 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3074 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3075 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3076 n = 0; 3077 for (i=0;i<local_primal_size;i++) { 3078 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3079 if (size_of_constraint > 1) { 3080 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3081 /* find first untouched local node */ 3082 j = 0; 3083 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3084 min_index = row_cmat_global_indices[j]; 3085 min_loc = j; 3086 /* search the minimum among nodes not yet touched on the connected component 3087 since there can be more than one constraint on a single cc */ 3088 for (j=1;j<size_of_constraint;j++) { 3089 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3090 min_index = row_cmat_global_indices[j]; 3091 min_loc = j; 3092 } 3093 } 3094 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3095 constraints_index[n++] = row_cmat_indices[min_loc]; 3096 } 3097 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3098 } 3099 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3100 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3101 } 3102 } 3103 *n_constraints = n; 3104 if (constraints_idx) *constraints_idx = constraints_index; 3105 PetscFunctionReturn(0); 3106 } 3107 3108 #undef __FUNCT__ 3109 #define __FUNCT__ "PCBDDCSubsetNumbering" 3110 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3111 { 3112 Vec local_vec,global_vec; 3113 IS seqis,paris; 3114 VecScatter scatter_ctx; 3115 PetscScalar *array; 3116 PetscInt *temp_global_dofs; 3117 PetscScalar globalsum; 3118 PetscInt i,j,s; 3119 PetscInt nlocals,first_index,old_index,max_local; 3120 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3121 PetscMPIInt *dof_sizes,*dof_displs; 3122 PetscBool first_found; 3123 PetscErrorCode ierr; 3124 3125 PetscFunctionBegin; 3126 /* mpi buffers */ 3127 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3128 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3129 j = ( !rank_prec_comm ? size_prec_comm : 0); 3130 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3131 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3132 /* get maximum size of subset */ 3133 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3134 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3135 max_local = 0; 3136 for (i=0;i<n_local_dofs;i++) { 3137 if (max_local < temp_global_dofs[i] ) { 3138 max_local = temp_global_dofs[i]; 3139 } 3140 } 3141 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3142 max_global++; 3143 max_local = 0; 3144 for (i=0;i<n_local_dofs;i++) { 3145 if (max_local < local_dofs[i] ) { 3146 max_local = local_dofs[i]; 3147 } 3148 } 3149 max_local++; 3150 /* allocate workspace */ 3151 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3152 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3153 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3154 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3155 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3156 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3157 /* create scatter */ 3158 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3159 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3160 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3161 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3162 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3163 /* init array */ 3164 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3165 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3166 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3167 if (local_dofs_mult) { 3168 for (i=0;i<n_local_dofs;i++) { 3169 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3170 } 3171 } else { 3172 for (i=0;i<n_local_dofs;i++) { 3173 array[local_dofs[i]]=1.0; 3174 } 3175 } 3176 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3177 /* scatter into global vec and get total number of global dofs */ 3178 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3179 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3180 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3181 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3182 /* Fill global_vec with cumulative function for global numbering */ 3183 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3184 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3185 nlocals = 0; 3186 first_index = -1; 3187 first_found = PETSC_FALSE; 3188 for (i=0;i<s;i++) { 3189 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3190 first_found = PETSC_TRUE; 3191 first_index = i; 3192 } 3193 nlocals += (PetscInt)PetscRealPart(array[i]); 3194 } 3195 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3196 if (!rank_prec_comm) { 3197 dof_displs[0]=0; 3198 for (i=1;i<size_prec_comm;i++) { 3199 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3200 } 3201 } 3202 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3203 if (first_found) { 3204 array[first_index] += (PetscScalar)nlocals; 3205 old_index = first_index; 3206 for (i=first_index+1;i<s;i++) { 3207 if (PetscRealPart(array[i]) > 0.1) { 3208 array[i] += array[old_index]; 3209 old_index = i; 3210 } 3211 } 3212 } 3213 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3214 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3215 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3216 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3217 /* get global ordering of local dofs */ 3218 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3219 if (local_dofs_mult) { 3220 for (i=0;i<n_local_dofs;i++) { 3221 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3222 } 3223 } else { 3224 for (i=0;i<n_local_dofs;i++) { 3225 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3226 } 3227 } 3228 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3229 /* free workspace */ 3230 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3231 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3232 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3233 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3234 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3235 /* return pointer to global ordering of local dofs */ 3236 *global_numbering_subset = temp_global_dofs; 3237 PetscFunctionReturn(0); 3238 } 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3242 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3243 { 3244 PetscInt i,j; 3245 PetscScalar *alphas; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 /* this implements stabilized Gram-Schmidt */ 3250 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3251 for (i=0;i<n;i++) { 3252 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3253 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3254 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3255 } 3256 ierr = PetscFree(alphas);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatISGetSubassemblingPattern" 3262 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3263 { 3264 Mat subdomain_adj; 3265 IS new_ranks,ranks_send_to; 3266 MatPartitioning partitioner; 3267 Mat_IS *matis; 3268 PetscInt n_neighs,*neighs,*n_shared,**shared; 3269 PetscInt prank; 3270 PetscMPIInt size,rank,color; 3271 PetscInt *xadj,*adjncy,*oldranks; 3272 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3273 PetscInt i,local_size,threshold=0; 3274 PetscErrorCode ierr; 3275 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3276 PetscSubcomm subcomm; 3277 3278 PetscFunctionBegin; 3279 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3280 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3281 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3282 3283 /* Get info on mapping */ 3284 matis = (Mat_IS*)(mat->data); 3285 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3286 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3287 3288 /* build local CSR graph of subdomains' connectivity */ 3289 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3290 xadj[0] = 0; 3291 xadj[1] = PetscMax(n_neighs-1,0); 3292 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3293 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3294 3295 if (threshold) { 3296 PetscInt xadj_count = 0; 3297 for (i=1;i<n_neighs;i++) { 3298 if (n_shared[i] > threshold) { 3299 adjncy[xadj_count] = neighs[i]; 3300 adjncy_wgt[xadj_count] = n_shared[i]; 3301 xadj_count++; 3302 } 3303 } 3304 xadj[1] = xadj_count; 3305 } else { 3306 if (xadj[1]) { 3307 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3308 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3309 } 3310 } 3311 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3312 if (use_square) { 3313 for (i=0;i<xadj[1];i++) { 3314 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3315 } 3316 } 3317 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3318 3319 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3320 3321 /* 3322 Restrict work on active processes only. 3323 */ 3324 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3325 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3326 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3327 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3328 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3329 if (color) { 3330 ierr = PetscFree(xadj);CHKERRQ(ierr); 3331 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3332 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3333 } else { 3334 PetscInt coarsening_ratio; 3335 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3336 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3337 prank = rank; 3338 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3339 /* 3340 for (i=0;i<size;i++) { 3341 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3342 } 3343 */ 3344 for (i=0;i<xadj[1];i++) { 3345 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3346 } 3347 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3348 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3349 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3350 3351 /* Partition */ 3352 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3353 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3354 if (use_vwgt) { 3355 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3356 v_wgt[0] = local_size; 3357 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3358 } 3359 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3360 coarsening_ratio = size/n_subdomains; 3361 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3362 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3363 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3364 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3365 3366 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3367 if (contiguous) { 3368 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3369 } else { 3370 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3371 } 3372 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3373 /* clean up */ 3374 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3375 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3376 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3377 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3378 } 3379 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3380 3381 /* assemble parallel IS for sends */ 3382 i = 1; 3383 if (color) i=0; 3384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3385 3386 /* get back IS */ 3387 *is_sends = ranks_send_to; 3388 PetscFunctionReturn(0); 3389 } 3390 3391 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3392 3393 #undef __FUNCT__ 3394 #define __FUNCT__ "MatISSubassemble" 3395 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3396 { 3397 Mat local_mat; 3398 Mat_IS *matis; 3399 IS is_sends_internal; 3400 PetscInt rows,cols,new_local_rows; 3401 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3402 PetscBool ismatis,isdense,newisdense,destroy_mat; 3403 ISLocalToGlobalMapping l2gmap; 3404 PetscInt* l2gmap_indices; 3405 const PetscInt* is_indices; 3406 MatType new_local_type; 3407 /* buffers */ 3408 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3409 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3410 PetscInt *recv_buffer_idxs_local; 3411 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3412 /* MPI */ 3413 MPI_Comm comm,comm_n; 3414 PetscSubcomm subcomm; 3415 PetscMPIInt n_sends,n_recvs,commsize; 3416 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3417 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3418 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3419 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3420 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3421 PetscErrorCode ierr; 3422 3423 PetscFunctionBegin; 3424 /* TODO: add missing checks */ 3425 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3426 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3427 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3428 PetscValidLogicalCollectiveInt(mat,nis,7); 3429 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3430 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3431 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3432 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3433 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3434 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3435 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3436 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3437 PetscInt mrows,mcols,mnrows,mncols; 3438 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3439 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3440 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3441 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3442 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3443 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3444 } 3445 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3446 PetscValidLogicalCollectiveInt(mat,bs,0); 3447 /* prepare IS for sending if not provided */ 3448 if (!is_sends) { 3449 PetscBool pcontig = PETSC_TRUE; 3450 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3451 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3452 } else { 3453 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3454 is_sends_internal = is_sends; 3455 } 3456 3457 /* get pointer of MATIS data */ 3458 matis = (Mat_IS*)mat->data; 3459 3460 /* get comm */ 3461 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3462 3463 /* compute number of sends */ 3464 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3465 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3466 3467 /* compute number of receives */ 3468 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3469 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3470 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3471 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3472 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3473 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3474 ierr = PetscFree(iflags);CHKERRQ(ierr); 3475 3476 /* restrict comm if requested */ 3477 subcomm = 0; 3478 destroy_mat = PETSC_FALSE; 3479 if (restrict_comm) { 3480 PetscMPIInt color,rank,subcommsize; 3481 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3482 color = 0; 3483 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3484 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3485 subcommsize = commsize - subcommsize; 3486 /* check if reuse has been requested */ 3487 if (reuse == MAT_REUSE_MATRIX) { 3488 if (*mat_n) { 3489 PetscMPIInt subcommsize2; 3490 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3491 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3492 comm_n = PetscObjectComm((PetscObject)*mat_n); 3493 } else { 3494 comm_n = PETSC_COMM_SELF; 3495 } 3496 } else { /* MAT_INITIAL_MATRIX */ 3497 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3498 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3499 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3500 comm_n = subcomm->comm; 3501 } 3502 /* flag to destroy *mat_n if not significative */ 3503 if (color) destroy_mat = PETSC_TRUE; 3504 } else { 3505 comm_n = comm; 3506 } 3507 3508 /* prepare send/receive buffers */ 3509 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3510 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3511 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3512 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3513 if (nis) { 3514 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3515 } 3516 3517 /* Get data from local matrices */ 3518 if (!isdense) { 3519 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3520 /* TODO: See below some guidelines on how to prepare the local buffers */ 3521 /* 3522 send_buffer_vals should contain the raw values of the local matrix 3523 send_buffer_idxs should contain: 3524 - MatType_PRIVATE type 3525 - PetscInt size_of_l2gmap 3526 - PetscInt global_row_indices[size_of_l2gmap] 3527 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3528 */ 3529 } else { 3530 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3531 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3532 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3533 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3534 send_buffer_idxs[1] = i; 3535 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3536 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3537 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3538 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3539 for (i=0;i<n_sends;i++) { 3540 ilengths_vals[is_indices[i]] = len*len; 3541 ilengths_idxs[is_indices[i]] = len+2; 3542 } 3543 } 3544 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3545 /* additional is (if any) */ 3546 if (nis) { 3547 PetscMPIInt psum; 3548 PetscInt j; 3549 for (j=0,psum=0;j<nis;j++) { 3550 PetscInt plen; 3551 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3552 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3553 psum += len+1; /* indices + lenght */ 3554 } 3555 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3556 for (j=0,psum=0;j<nis;j++) { 3557 PetscInt plen; 3558 const PetscInt *is_array_idxs; 3559 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3560 send_buffer_idxs_is[psum] = plen; 3561 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3562 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3563 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3564 psum += plen+1; /* indices + lenght */ 3565 } 3566 for (i=0;i<n_sends;i++) { 3567 ilengths_idxs_is[is_indices[i]] = psum; 3568 } 3569 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3570 } 3571 3572 buf_size_idxs = 0; 3573 buf_size_vals = 0; 3574 buf_size_idxs_is = 0; 3575 for (i=0;i<n_recvs;i++) { 3576 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3577 buf_size_vals += (PetscInt)olengths_vals[i]; 3578 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3579 } 3580 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3581 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3582 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3583 3584 /* get new tags for clean communications */ 3585 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3586 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3587 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3588 3589 /* allocate for requests */ 3590 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3591 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3592 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3593 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3594 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3595 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3596 3597 /* communications */ 3598 ptr_idxs = recv_buffer_idxs; 3599 ptr_vals = recv_buffer_vals; 3600 ptr_idxs_is = recv_buffer_idxs_is; 3601 for (i=0;i<n_recvs;i++) { 3602 source_dest = onodes[i]; 3603 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3604 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3605 ptr_idxs += olengths_idxs[i]; 3606 ptr_vals += olengths_vals[i]; 3607 if (nis) { 3608 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3609 ptr_idxs_is += olengths_idxs_is[i]; 3610 } 3611 } 3612 for (i=0;i<n_sends;i++) { 3613 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3614 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3615 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3616 if (nis) { 3617 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3618 } 3619 } 3620 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3621 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3622 3623 /* assemble new l2g map */ 3624 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3625 ptr_idxs = recv_buffer_idxs; 3626 new_local_rows = 0; 3627 for (i=0;i<n_recvs;i++) { 3628 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3629 ptr_idxs += olengths_idxs[i]; 3630 } 3631 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3632 ptr_idxs = recv_buffer_idxs; 3633 new_local_rows = 0; 3634 for (i=0;i<n_recvs;i++) { 3635 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3636 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3637 ptr_idxs += olengths_idxs[i]; 3638 } 3639 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3640 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3641 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3642 3643 /* infer new local matrix type from received local matrices type */ 3644 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3645 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3646 if (n_recvs) { 3647 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3648 ptr_idxs = recv_buffer_idxs; 3649 for (i=0;i<n_recvs;i++) { 3650 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3651 new_local_type_private = MATAIJ_PRIVATE; 3652 break; 3653 } 3654 ptr_idxs += olengths_idxs[i]; 3655 } 3656 switch (new_local_type_private) { 3657 case MATDENSE_PRIVATE: 3658 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3659 new_local_type = MATSEQAIJ; 3660 bs = 1; 3661 } else { /* if I receive only 1 dense matrix */ 3662 new_local_type = MATSEQDENSE; 3663 bs = 1; 3664 } 3665 break; 3666 case MATAIJ_PRIVATE: 3667 new_local_type = MATSEQAIJ; 3668 bs = 1; 3669 break; 3670 case MATBAIJ_PRIVATE: 3671 new_local_type = MATSEQBAIJ; 3672 break; 3673 case MATSBAIJ_PRIVATE: 3674 new_local_type = MATSEQSBAIJ; 3675 break; 3676 default: 3677 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3678 break; 3679 } 3680 } else { /* by default, new_local_type is seqdense */ 3681 new_local_type = MATSEQDENSE; 3682 bs = 1; 3683 } 3684 3685 /* create MATIS object if needed */ 3686 if (reuse == MAT_INITIAL_MATRIX) { 3687 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3688 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3689 } else { 3690 /* it also destroys the local matrices */ 3691 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3692 } 3693 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3694 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3695 3696 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3697 3698 /* Global to local map of received indices */ 3699 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3700 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3701 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3702 3703 /* restore attributes -> type of incoming data and its size */ 3704 buf_size_idxs = 0; 3705 for (i=0;i<n_recvs;i++) { 3706 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3707 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3708 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3709 } 3710 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3711 3712 /* set preallocation */ 3713 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3714 if (!newisdense) { 3715 PetscInt *new_local_nnz=0; 3716 3717 ptr_vals = recv_buffer_vals; 3718 ptr_idxs = recv_buffer_idxs_local; 3719 if (n_recvs) { 3720 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3721 } 3722 for (i=0;i<n_recvs;i++) { 3723 PetscInt j; 3724 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3725 for (j=0;j<*(ptr_idxs+1);j++) { 3726 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3727 } 3728 } else { 3729 /* TODO */ 3730 } 3731 ptr_idxs += olengths_idxs[i]; 3732 } 3733 if (new_local_nnz) { 3734 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3735 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3736 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3737 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3738 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3739 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3740 } else { 3741 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3742 } 3743 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3744 } else { 3745 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3746 } 3747 3748 /* set values */ 3749 ptr_vals = recv_buffer_vals; 3750 ptr_idxs = recv_buffer_idxs_local; 3751 for (i=0;i<n_recvs;i++) { 3752 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3753 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3754 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3755 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3756 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3757 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3758 } else { 3759 /* TODO */ 3760 } 3761 ptr_idxs += olengths_idxs[i]; 3762 ptr_vals += olengths_vals[i]; 3763 } 3764 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3766 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3769 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3770 3771 #if 0 3772 if (!restrict_comm) { /* check */ 3773 Vec lvec,rvec; 3774 PetscReal infty_error; 3775 3776 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3777 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3778 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3779 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3780 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3781 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3782 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3783 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3784 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3785 } 3786 #endif 3787 3788 /* assemble new additional is (if any) */ 3789 if (nis) { 3790 PetscInt **temp_idxs,*count_is,j,psum; 3791 3792 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3793 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3794 ptr_idxs = recv_buffer_idxs_is; 3795 psum = 0; 3796 for (i=0;i<n_recvs;i++) { 3797 for (j=0;j<nis;j++) { 3798 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3799 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3800 psum += plen; 3801 ptr_idxs += plen+1; /* shift pointer to received data */ 3802 } 3803 } 3804 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3805 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3806 for (i=1;i<nis;i++) { 3807 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3808 } 3809 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3810 ptr_idxs = recv_buffer_idxs_is; 3811 for (i=0;i<n_recvs;i++) { 3812 for (j=0;j<nis;j++) { 3813 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3814 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3815 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3816 ptr_idxs += plen+1; /* shift pointer to received data */ 3817 } 3818 } 3819 for (i=0;i<nis;i++) { 3820 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3821 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3822 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3823 } 3824 ierr = PetscFree(count_is);CHKERRQ(ierr); 3825 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3826 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3827 } 3828 /* free workspace */ 3829 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3830 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3831 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3832 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3833 if (isdense) { 3834 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3835 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3836 } else { 3837 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3838 } 3839 if (nis) { 3840 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3841 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3842 } 3843 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3844 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3845 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3846 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3847 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3848 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3849 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3850 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3851 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3852 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3853 ierr = PetscFree(onodes);CHKERRQ(ierr); 3854 if (nis) { 3855 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3856 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3857 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3858 } 3859 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3860 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3861 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3862 for (i=0;i<nis;i++) { 3863 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3864 } 3865 } 3866 PetscFunctionReturn(0); 3867 } 3868 3869 /* temporary hack into ksp private data structure */ 3870 #include <petsc-private/kspimpl.h> 3871 3872 #undef __FUNCT__ 3873 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3874 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3875 { 3876 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3877 PC_IS *pcis = (PC_IS*)pc->data; 3878 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3879 MatNullSpace CoarseNullSpace=NULL; 3880 ISLocalToGlobalMapping coarse_islg; 3881 IS coarse_is,*isarray; 3882 PetscInt i,im_active=-1,active_procs=-1; 3883 PetscInt nis,nisdofs,nisneu; 3884 PC pc_temp; 3885 PCType coarse_pc_type; 3886 KSPType coarse_ksp_type; 3887 PetscBool multilevel_requested,multilevel_allowed; 3888 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3889 Mat t_coarse_mat_is; 3890 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3891 PetscMPIInt all_procs; 3892 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3893 PetscBool compute_vecs = PETSC_FALSE; 3894 PetscScalar *array; 3895 PetscErrorCode ierr; 3896 3897 PetscFunctionBegin; 3898 /* Assign global numbering to coarse dofs */ 3899 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3900 compute_vecs = PETSC_TRUE; 3901 PetscInt ocoarse_size; 3902 ocoarse_size = pcbddc->coarse_size; 3903 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3904 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3905 /* see if we can avoid some work */ 3906 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3907 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3908 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3909 coarse_reuse = PETSC_FALSE; 3910 } else { /* we can safely reuse already computed coarse matrix */ 3911 coarse_reuse = PETSC_TRUE; 3912 } 3913 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3914 coarse_reuse = PETSC_FALSE; 3915 } 3916 /* reset any subassembling information */ 3917 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3918 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3919 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3920 coarse_reuse = PETSC_TRUE; 3921 } 3922 3923 /* count "active" (i.e. with positive local size) and "void" processes */ 3924 im_active = !!(pcis->n); 3925 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3926 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3927 void_procs = all_procs-active_procs; 3928 csin_type_simple = PETSC_TRUE; 3929 redist = PETSC_FALSE; 3930 if (pcbddc->current_level && void_procs) { 3931 csin_ml = PETSC_TRUE; 3932 ncoarse_ml = void_procs; 3933 csin_ds = PETSC_TRUE; 3934 ncoarse_ds = void_procs; 3935 } else { 3936 csin_ml = PETSC_FALSE; 3937 ncoarse_ml = all_procs; 3938 if (void_procs) { 3939 csin_ds = PETSC_TRUE; 3940 ncoarse_ds = void_procs; 3941 csin_type_simple = PETSC_FALSE; 3942 } else { 3943 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3944 csin_ds = PETSC_TRUE; 3945 ncoarse_ds = pcbddc->redistribute_coarse; 3946 redist = PETSC_TRUE; 3947 } else { 3948 csin_ds = PETSC_FALSE; 3949 ncoarse_ds = all_procs; 3950 } 3951 } 3952 } 3953 3954 /* 3955 test if we can go multilevel: three conditions must be satisfied: 3956 - we have not exceeded the number of levels requested 3957 - we can actually subassemble the active processes 3958 - we can find a suitable number of MPI processes where we can place the subassembled problem 3959 */ 3960 multilevel_allowed = PETSC_FALSE; 3961 multilevel_requested = PETSC_FALSE; 3962 if (pcbddc->current_level < pcbddc->max_levels) { 3963 multilevel_requested = PETSC_TRUE; 3964 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3965 multilevel_allowed = PETSC_FALSE; 3966 } else { 3967 multilevel_allowed = PETSC_TRUE; 3968 } 3969 } 3970 /* determine number of process partecipating to coarse solver */ 3971 if (multilevel_allowed) { 3972 ncoarse = ncoarse_ml; 3973 csin = csin_ml; 3974 } else { 3975 ncoarse = ncoarse_ds; 3976 csin = csin_ds; 3977 } 3978 3979 /* creates temporary l2gmap and IS for coarse indexes */ 3980 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3981 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3982 3983 /* creates temporary MATIS object for coarse matrix */ 3984 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3985 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3986 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3987 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3988 #if 0 3989 { 3990 PetscViewer viewer; 3991 char filename[256]; 3992 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3993 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3994 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3995 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3996 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3997 } 3998 #endif 3999 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4000 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4001 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4002 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4003 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4004 4005 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4006 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4007 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4008 const PetscInt *idxs; 4009 ISLocalToGlobalMapping tmap; 4010 4011 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4012 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4013 /* allocate space for temporary storage */ 4014 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4015 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4016 /* allocate for IS array */ 4017 nisdofs = pcbddc->n_ISForDofsLocal; 4018 nisneu = !!pcbddc->NeumannBoundariesLocal; 4019 nis = nisdofs + nisneu; 4020 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4021 /* dofs splitting */ 4022 for (i=0;i<nisdofs;i++) { 4023 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4024 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4025 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4026 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4027 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4028 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4029 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4030 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4031 } 4032 /* neumann boundaries */ 4033 if (pcbddc->NeumannBoundariesLocal) { 4034 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4035 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4036 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4037 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4038 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4039 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4040 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4041 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4042 } 4043 /* free memory */ 4044 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4045 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4046 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4047 } else { 4048 nis = 0; 4049 nisdofs = 0; 4050 nisneu = 0; 4051 isarray = NULL; 4052 } 4053 /* destroy no longer needed map */ 4054 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4055 4056 /* restrict on coarse candidates (if needed) */ 4057 coarse_mat_is = NULL; 4058 if (csin) { 4059 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4060 if (redist) { 4061 PetscMPIInt rank; 4062 PetscInt spc,n_spc_p1,dest[1]; 4063 4064 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4065 spc = all_procs/pcbddc->redistribute_coarse; 4066 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 4067 if (rank > n_spc_p1*(spc+1)-1) { 4068 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4069 } else { 4070 dest[0] = rank/(spc+1); 4071 } 4072 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4073 } else { 4074 PetscInt j,tissize,*nisindices; 4075 PetscInt *coarse_candidates; 4076 const PetscInt* tisindices; 4077 /* get coarse candidates' ranks in pc communicator */ 4078 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4079 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4080 for (i=0,j=0;i<all_procs;i++) { 4081 if (!coarse_candidates[i]) { 4082 coarse_candidates[j]=i; 4083 j++; 4084 } 4085 } 4086 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 4087 /* get a suitable subassembling pattern */ 4088 if (csin_type_simple) { 4089 PetscMPIInt rank; 4090 PetscInt issize,isidx; 4091 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4092 if (im_active) { 4093 issize = 1; 4094 isidx = (PetscInt)rank; 4095 } else { 4096 issize = 0; 4097 isidx = -1; 4098 } 4099 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4100 } else { 4101 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4102 } 4103 if (pcbddc->dbg_flag) { 4104 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4105 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4106 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4107 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4108 for (i=0;i<j;i++) { 4109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4110 } 4111 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 } 4114 /* shift the pattern on coarse candidates */ 4115 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4116 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4117 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4118 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4119 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4120 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4121 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4122 } 4123 } 4124 if (pcbddc->dbg_flag) { 4125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4126 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4127 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4128 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4129 } 4130 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4131 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4132 } else { 4133 if (pcbddc->dbg_flag) { 4134 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4137 } 4138 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4139 coarse_mat_is = t_coarse_mat_is; 4140 } 4141 4142 /* create local to global scatters for coarse problem */ 4143 if (compute_vecs) { 4144 PetscInt lrows; 4145 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4146 if (coarse_mat_is) { 4147 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4148 } else { 4149 lrows = 0; 4150 } 4151 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4152 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4153 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4154 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4155 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4156 } 4157 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4158 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4159 4160 /* set defaults for coarse KSP and PC */ 4161 if (multilevel_allowed) { 4162 coarse_ksp_type = KSPRICHARDSON; 4163 coarse_pc_type = PCBDDC; 4164 } else { 4165 coarse_ksp_type = KSPPREONLY; 4166 coarse_pc_type = PCREDUNDANT; 4167 } 4168 4169 /* print some info if requested */ 4170 if (pcbddc->dbg_flag) { 4171 if (!multilevel_allowed) { 4172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4173 if (multilevel_requested) { 4174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4175 } else if (pcbddc->max_levels) { 4176 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4177 } 4178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4179 } 4180 } 4181 4182 /* create the coarse KSP object only once with defaults */ 4183 if (coarse_mat_is) { 4184 MatReuse coarse_mat_reuse; 4185 PetscViewer dbg_viewer = NULL; 4186 if (pcbddc->dbg_flag) { 4187 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4188 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4189 } 4190 if (!pcbddc->coarse_ksp) { 4191 char prefix[256],str_level[16]; 4192 size_t len; 4193 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4194 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4195 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4196 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4197 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4198 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4199 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4200 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4201 /* prefix */ 4202 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4203 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4204 if (!pcbddc->current_level) { 4205 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4206 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4207 } else { 4208 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4209 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4210 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4211 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4212 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4213 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4214 } 4215 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4216 /* allow user customization */ 4217 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4218 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4219 } 4220 4221 /* get some info after set from options */ 4222 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4223 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4224 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4225 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4226 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4227 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4228 isbddc = PETSC_FALSE; 4229 } 4230 if (isredundant) { 4231 KSP inner_ksp; 4232 PC inner_pc; 4233 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4234 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4235 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4236 } 4237 4238 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4239 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4240 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4241 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4242 if (nisdofs) { 4243 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4244 for (i=0;i<nisdofs;i++) { 4245 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4246 } 4247 } 4248 if (nisneu) { 4249 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4250 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4251 } 4252 4253 /* assemble coarse matrix */ 4254 if (coarse_reuse) { 4255 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4256 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4257 coarse_mat_reuse = MAT_REUSE_MATRIX; 4258 } else { 4259 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4260 } 4261 if (isbddc || isnn) { 4262 if (pcbddc->coarsening_ratio > 1) { 4263 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4264 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4265 if (pcbddc->dbg_flag) { 4266 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4267 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4268 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4269 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4270 } 4271 } 4272 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4273 } else { 4274 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4275 coarse_mat = coarse_mat_is; 4276 } 4277 } else { 4278 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4279 } 4280 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4281 4282 /* propagate symmetry info to coarse matrix */ 4283 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4284 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4285 4286 /* set operators */ 4287 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4288 if (pcbddc->dbg_flag) { 4289 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4290 } 4291 } else { /* processes non partecipating to coarse solver (if any) */ 4292 coarse_mat = 0; 4293 } 4294 ierr = PetscFree(isarray);CHKERRQ(ierr); 4295 #if 0 4296 { 4297 PetscViewer viewer; 4298 char filename[256]; 4299 sprintf(filename,"coarse_mat.m"); 4300 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4301 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4302 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4303 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4304 } 4305 #endif 4306 4307 /* Compute coarse null space (special handling by BDDC only) */ 4308 if (pcbddc->NullSpace) { 4309 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4310 } 4311 4312 if (pcbddc->coarse_ksp) { 4313 Vec crhs,csol; 4314 PetscBool ispreonly; 4315 if (CoarseNullSpace) { 4316 if (isbddc) { 4317 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4318 } else { 4319 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4320 } 4321 } 4322 /* setup coarse ksp */ 4323 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4324 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4325 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4326 /* hack */ 4327 if (!csol) { 4328 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4329 } 4330 if (!crhs) { 4331 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4332 } 4333 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4334 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4335 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4336 KSP check_ksp; 4337 KSPType check_ksp_type; 4338 PC check_pc; 4339 Vec check_vec,coarse_vec; 4340 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4341 PetscInt its; 4342 PetscBool compute_eigs; 4343 PetscReal *eigs_r,*eigs_c; 4344 PetscInt neigs; 4345 const char *prefix; 4346 4347 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4348 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4349 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4350 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4351 if (ispreonly) { 4352 check_ksp_type = KSPPREONLY; 4353 compute_eigs = PETSC_FALSE; 4354 } else { 4355 check_ksp_type = KSPGMRES; 4356 compute_eigs = PETSC_TRUE; 4357 } 4358 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4359 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4360 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4361 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4362 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4363 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4364 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4365 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4366 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4367 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4368 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4369 /* create random vec */ 4370 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4371 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4372 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4373 if (CoarseNullSpace) { 4374 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4375 } 4376 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4377 /* solve coarse problem */ 4378 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4379 if (CoarseNullSpace) { 4380 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4381 } 4382 /* set eigenvalue estimation if preonly has not been requested */ 4383 if (compute_eigs) { 4384 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4385 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4386 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4387 lambda_max = eigs_r[neigs-1]; 4388 lambda_min = eigs_r[0]; 4389 if (pcbddc->use_coarse_estimates) { 4390 if (lambda_max>lambda_min) { 4391 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4392 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4393 } 4394 } 4395 } 4396 4397 /* check coarse problem residual error */ 4398 if (pcbddc->dbg_flag) { 4399 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4400 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4401 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4402 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4403 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4404 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4405 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4406 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4407 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4408 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4410 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4411 if (compute_eigs) { 4412 PetscReal lambda_max_s,lambda_min_s; 4413 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4414 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4415 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4416 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4417 for (i=0;i<neigs;i++) { 4418 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4419 } 4420 } 4421 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4422 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4423 } 4424 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4425 if (compute_eigs) { 4426 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4427 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4428 } 4429 } 4430 } 4431 /* print additional info */ 4432 if (pcbddc->dbg_flag) { 4433 /* waits until all processes reaches this point */ 4434 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4436 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4437 } 4438 4439 /* free memory */ 4440 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4441 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4442 PetscFunctionReturn(0); 4443 } 4444 4445 #undef __FUNCT__ 4446 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4447 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4448 { 4449 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4450 PC_IS* pcis = (PC_IS*)pc->data; 4451 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4452 PetscInt i,coarse_size; 4453 PetscInt *local_primal_indices; 4454 PetscErrorCode ierr; 4455 4456 PetscFunctionBegin; 4457 /* Compute global number of coarse dofs */ 4458 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4459 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4460 } 4461 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4462 4463 /* check numbering */ 4464 if (pcbddc->dbg_flag) { 4465 PetscScalar coarsesum,*array; 4466 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4467 4468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4470 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4471 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4472 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4473 for (i=0;i<pcbddc->local_primal_size;i++) { 4474 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4475 } 4476 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4477 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4478 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4479 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4480 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4481 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4482 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4483 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4484 for (i=0;i<pcis->n;i++) { 4485 if (array[i] == 1.0) { 4486 set_error = PETSC_TRUE; 4487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4488 } 4489 } 4490 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 for (i=0;i<pcis->n;i++) { 4493 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4494 } 4495 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4496 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4497 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4498 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4499 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4500 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4501 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4502 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4505 for (i=0;i<pcbddc->local_primal_size;i++) { 4506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4507 } 4508 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4509 } 4510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4511 if (set_error_reduced) { 4512 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4513 } 4514 } 4515 /* get back data */ 4516 *coarse_size_n = coarse_size; 4517 *local_primal_indices_n = local_primal_indices; 4518 PetscFunctionReturn(0); 4519 } 4520 4521 #undef __FUNCT__ 4522 #define __FUNCT__ "PCBDDCGlobalToLocal" 4523 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4524 { 4525 IS localis_t; 4526 PetscInt i,lsize,*idxs,n; 4527 PetscScalar *vals; 4528 PetscErrorCode ierr; 4529 4530 PetscFunctionBegin; 4531 /* get indices in local ordering exploiting local to global map */ 4532 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4533 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4534 for (i=0;i<lsize;i++) vals[i] = 1.0; 4535 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4536 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4537 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4538 if (idxs) { /* multilevel guard */ 4539 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4540 } 4541 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4542 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4543 ierr = PetscFree(vals);CHKERRQ(ierr); 4544 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4545 /* now compute set in local ordering */ 4546 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4547 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4548 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4549 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4550 for (i=0,lsize=0;i<n;i++) { 4551 if (PetscRealPart(vals[i]) > 0.5) { 4552 lsize++; 4553 } 4554 } 4555 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4556 for (i=0,lsize=0;i<n;i++) { 4557 if (PetscRealPart(vals[i]) > 0.5) { 4558 idxs[lsize++] = i; 4559 } 4560 } 4561 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4562 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4563 *localis = localis_t; 4564 PetscFunctionReturn(0); 4565 } 4566 4567 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "PCBDDCMatMult_Private" 4570 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4571 { 4572 PCBDDCChange_ctx change_ctx; 4573 PetscErrorCode ierr; 4574 4575 PetscFunctionBegin; 4576 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4577 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4578 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4579 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 #undef __FUNCT__ 4584 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4585 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4586 { 4587 PCBDDCChange_ctx change_ctx; 4588 PetscErrorCode ierr; 4589 4590 PetscFunctionBegin; 4591 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4592 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4593 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4594 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4595 PetscFunctionReturn(0); 4596 } 4597 4598 #undef __FUNCT__ 4599 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4600 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4601 { 4602 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4603 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4604 PetscInt *used_xadj,*used_adjncy; 4605 PetscBool free_used_adj; 4606 PetscErrorCode ierr; 4607 4608 PetscFunctionBegin; 4609 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4610 free_used_adj = PETSC_FALSE; 4611 if (pcbddc->sub_schurs_layers == -1) { 4612 used_xadj = NULL; 4613 used_adjncy = NULL; 4614 } else { 4615 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4616 used_xadj = pcbddc->mat_graph->xadj; 4617 used_adjncy = pcbddc->mat_graph->adjncy; 4618 } else if (pcbddc->computed_rowadj) { 4619 used_xadj = pcbddc->mat_graph->xadj; 4620 used_adjncy = pcbddc->mat_graph->adjncy; 4621 } else { 4622 Mat mat_adj; 4623 PetscBool flg_row=PETSC_TRUE; 4624 const PetscInt *xadj,*adjncy; 4625 PetscInt nvtxs; 4626 4627 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4628 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4629 if (!flg_row) { 4630 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4631 } 4632 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4633 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4634 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4635 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4636 if (!flg_row) { 4637 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4638 } 4639 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4640 free_used_adj = PETSC_TRUE; 4641 } 4642 } 4643 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4644 4645 /* free adjacency */ 4646 if (free_used_adj) { 4647 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4648 } 4649 PetscFunctionReturn(0); 4650 } 4651 4652 #undef __FUNCT__ 4653 #define __FUNCT__ "PCBDDCInitSubSchurs" 4654 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4655 { 4656 PC_IS *pcis=(PC_IS*)pc->data; 4657 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4658 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4659 PCBDDCGraph graph; 4660 Mat S_j; 4661 PetscErrorCode ierr; 4662 4663 PetscFunctionBegin; 4664 /* attach interface graph for determining subsets */ 4665 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4666 IS verticesIS; 4667 4668 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4669 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4670 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4671 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4672 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4673 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4674 /* 4675 if (pcbddc->dbg_flag) { 4676 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4677 } 4678 */ 4679 } else { 4680 graph = pcbddc->mat_graph; 4681 } 4682 4683 /* Create Schur complement matrix */ 4684 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4685 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4686 4687 /* sub_schurs init */ 4688 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4689 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4690 /* free graph struct */ 4691 if (pcbddc->sub_schurs_rebuild) { 4692 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4693 } 4694 PetscFunctionReturn(0); 4695 } 4696