1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 n = PetscMax(n,nmin); 66 cum += subset_size*n; 67 cum2 += n; 68 maxneigs = PetscMax(maxneigs,n); 69 } 70 } 71 72 if (mss) { 73 if (symmetric) { 74 PetscBLASInt B_itype = 1; 75 PetscBLASInt B_N = mss; 76 PetscScalar zero = 0.0; 77 PetscScalar eps = 0.0; /* dlamch? */ 78 79 B_lwork = -1; 80 S = NULL; 81 St = NULL; 82 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 83 #if defined(PETSC_USE_COMPLEX) 84 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 85 #else 86 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 87 #endif 88 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 89 ierr = PetscFPTrapPop();CHKERRQ(ierr); 90 } else { 91 /* TODO */ 92 } 93 } else { 94 lwork = 0; 95 } 96 97 nv = 0; 98 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 99 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 100 } 101 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 102 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 103 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 104 #if defined(PETSC_USE_COMPLEX) 105 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 106 #endif 107 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 108 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 109 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 110 nv+cum,&pcbddc->adaptive_constraints_idxs, 111 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 112 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 113 114 maxneigs = 0; 115 cum = cum2 = cumarray = 0; 116 if (sub_schurs->is_Ej_com) { 117 const PetscInt *idxs; 118 119 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 120 for (cum=0;cum<nv;cum++) { 121 pcbddc->adaptive_constraints_n[cum] = 1; 122 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 123 pcbddc->adaptive_constraints_ptrs[cum] = cum; 124 pcbddc->adaptive_constraints_data[cum] = 1.0; 125 } 126 cum2 = cum; 127 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 128 } 129 130 if (mss) { /* multilevel */ 131 if (pcbddc->use_deluxe_scaling) { 132 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 133 } else { 134 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 135 } 136 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 137 } 138 139 cumsubs = 0; 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,zero=0.0; 147 PetscBLASInt B_N; 148 149 /* S should be copied since we need it for deluxe scaling */ 150 if (symmetric) { 151 PetscInt j; 152 for (j=0;j<subset_size;j++) { 153 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 154 } 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 } else { 159 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 160 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 /* is always this the right matrix? */ 163 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 165 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 166 /* St = Starray+cumarray; */ 167 168 if (symmetric) { 169 PetscBLASInt B_itype = 1; 170 PetscBLASInt B_IL = 1, B_IU; 171 PetscScalar eps = -1.0; /* dlamch? */ 172 173 /* ask for eigenvalues lower than thresh */ 174 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 175 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 176 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 177 #if defined(PETSC_USE_COMPLEX) 178 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 179 #else 180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 181 #endif 182 ierr = PetscFPTrapPop();CHKERRQ(ierr); 183 if (B_ierr) { 184 if (B_ierr < 0 ) { 185 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 186 } else if (B_ierr <= B_N) { 187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 188 } else { 189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 190 } 191 } 192 193 if (B_neigs > nmax) { 194 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax); 195 B_neigs = nmax; 196 } 197 198 if (B_neigs < nmin) { 199 PetscBLASInt B_neigs2; 200 201 B_IL = B_neigs + 1; 202 ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr); 203 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 204 if (symmetric) { 205 PetscInt j; 206 for (j=0;j<subset_size;j++) { 207 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 208 } 209 for (j=0;j<subset_size;j++) { 210 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 211 } 212 } else { 213 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 214 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 217 #if defined(PETSC_USE_COMPLEX) 218 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 219 #else 220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 221 #endif 222 ierr = PetscFPTrapPop();CHKERRQ(ierr); 223 B_neigs += B_neigs2; 224 } 225 if (B_ierr) { 226 if (B_ierr < 0 ) { 227 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 228 } else if (B_ierr <= B_N) { 229 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 230 } else { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1); 232 } 233 } 234 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 235 for (j=0;j<B_neigs;j++) { 236 if (eigs[j] == 0.0) { 237 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 238 } else { 239 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 240 } 241 } 242 } else { 243 /* TODO */ 244 } 245 maxneigs = PetscMax(B_neigs,maxneigs); 246 pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs; 247 cumsubs++; 248 249 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 250 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 251 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 253 for (j=0;j<B_neigs;j++) { 254 #if 0 255 { 256 PetscBLASInt Blas_N,Blas_one = 1.0; 257 PetscScalar norm; 258 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 259 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 260 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 261 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 262 } else { 263 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 264 } 265 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 266 } 267 #endif 268 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 269 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 270 cum2 += subset_size; 271 } 272 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 273 } 274 /* shift for next computation */ 275 cumarray += subset_size*subset_size; 276 } 277 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 278 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 279 280 if (mss) { 281 if (pcbddc->use_deluxe_scaling) { 282 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 283 } else { 284 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 285 } 286 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 287 } 288 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 289 #if defined(PETSC_USE_COMPLEX) 290 ierr = PetscFree(rwork);CHKERRQ(ierr); 291 #endif 292 if (pcbddc->dbg_flag) { 293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 294 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNCT__ 300 #define __FUNCT__ "PCBDDCSetUpSolvers" 301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 302 { 303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 304 PetscScalar *coarse_submat_vals; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 311 312 /* Setup local neumann solver ksp_R */ 313 /* PCBDDCSetUpLocalScatters should be called first! */ 314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 315 316 /* Change global null space passed in by the user if change of basis has been requested */ 317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 319 } 320 321 /* 322 Setup local correction and local part of coarse basis. 323 Gives back the dense local part of the coarse matrix in column major ordering 324 */ 325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 326 327 /* Compute total number of coarse nodes and setup coarse solver */ 328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 329 330 /* free */ 331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCResetCustomization" 337 PetscErrorCode PCBDDCResetCustomization(PC pc) 338 { 339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 345 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 346 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 347 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 348 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 349 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 350 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 351 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 352 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 353 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCResetTopography" 359 PetscErrorCode PCBDDCResetTopography(PC pc) 360 { 361 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 366 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 367 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 368 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 369 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 370 PetscFunctionReturn(0); 371 } 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "PCBDDCResetSolvers" 375 PetscErrorCode PCBDDCResetSolvers(PC pc) 376 { 377 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 378 PetscScalar *array; 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 383 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 384 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 385 ierr = PetscFree(array);CHKERRQ(ierr); 386 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 387 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 388 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 389 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 390 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 391 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 392 if (pcbddc->local_auxmat2) { 393 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 394 ierr = PetscFree(array);CHKERRQ(ierr); 395 } 396 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 397 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 398 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 399 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 401 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 402 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 403 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 404 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 405 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 406 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 408 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 409 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 410 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 411 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 417 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 418 { 419 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 420 PC_IS *pcis = (PC_IS*)pc->data; 421 VecType impVecType; 422 PetscInt n_constraints,n_R,old_size; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 if (!pcbddc->ConstraintMatrix) { 427 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 428 } 429 /* get sizes */ 430 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 431 n_R = pcis->n-pcbddc->n_actual_vertices; 432 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 433 /* local work vectors (try to avoid unneeded work)*/ 434 /* R nodes */ 435 old_size = -1; 436 if (pcbddc->vec1_R) { 437 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 438 } 439 if (n_R != old_size) { 440 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 441 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 442 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 444 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 445 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 446 } 447 /* local primal dofs */ 448 old_size = -1; 449 if (pcbddc->vec1_P) { 450 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 451 } 452 if (pcbddc->local_primal_size != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 456 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 457 } 458 /* local explicit constraints */ 459 old_size = -1; 460 if (pcbddc->vec1_C) { 461 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 462 } 463 if (n_constraints && n_constraints != old_size) { 464 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 465 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 466 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 467 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "PCBDDCSetUpCorrection" 474 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 475 { 476 PetscErrorCode ierr; 477 /* pointers to pcis and pcbddc */ 478 PC_IS* pcis = (PC_IS*)pc->data; 479 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 480 /* submatrices of local problem */ 481 Mat A_RV,A_VR,A_VV; 482 /* submatrices of local coarse problem */ 483 Mat S_VV,S_CV,S_VC,S_CC; 484 /* working matrices */ 485 Mat C_CR; 486 /* additional working stuff */ 487 PC pc_R; 488 Mat F; 489 PetscBool isLU,isCHOL,isILU; 490 491 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 492 PetscScalar *work; 493 PetscInt *idx_V_B; 494 PetscInt n,n_vertices,n_constraints; 495 PetscInt i,n_R,n_D,n_B; 496 PetscBool unsymmetric_check; 497 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 498 MatType impMatType; 499 /* some shortcuts to scalars */ 500 PetscScalar one=1.0,m_one=-1.0; 501 502 PetscFunctionBegin; 503 /* get number of vertices (corners plus constraints with change of basis) 504 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 505 n_vertices = pcbddc->n_actual_vertices; 506 n_constraints = pcbddc->local_primal_size-n_vertices; 507 /* Set Non-overlapping dimensions */ 508 n_B = pcis->n_B; n_D = pcis->n - n_B; 509 n_R = pcis->n-n_vertices; 510 511 /* Set types for local objects needed by BDDC precondtioner */ 512 impMatType = MATSEQDENSE; 513 514 /* vertices in boundary numbering */ 515 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 516 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 517 if (i != n_vertices) { 518 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 519 } 520 521 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 522 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 523 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 524 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 526 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 527 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 528 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 529 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 530 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 531 532 unsymmetric_check = PETSC_FALSE; 533 /* allocate workspace */ 534 n = 0; 535 if (n_constraints) { 536 n += n_R*n_constraints; 537 } 538 if (n_vertices) { 539 n = PetscMax(2*n_R*n_vertices,n); 540 } 541 if (!pcbddc->issym) { 542 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 543 unsymmetric_check = PETSC_TRUE; 544 } 545 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 546 547 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 548 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 549 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 552 if (isLU || isILU || isCHOL) { 553 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 554 } else { 555 F = NULL; 556 } 557 558 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 559 if (n_constraints) { 560 Mat M1,M2,M3; 561 IS is_aux; 562 /* see if we can save some allocations */ 563 if (pcbddc->local_auxmat2) { 564 PetscInt on_R,on_constraints; 565 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 566 if (on_R != n_R || on_constraints != n_constraints) { 567 PetscScalar *marray; 568 569 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 570 ierr = PetscFree(marray);CHKERRQ(ierr); 571 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 572 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 573 } 574 } 575 /* auxiliary matrices */ 576 if (!pcbddc->local_auxmat2) { 577 PetscScalar *marray; 578 579 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 581 marray += n_R*n_constraints; 582 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 583 } 584 585 /* Extract constraints on R nodes: C_{CR} */ 586 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 587 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 588 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 589 590 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 591 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 592 for (i=0;i<n_constraints;i++) { 593 const PetscScalar *row_cmat_values; 594 const PetscInt *row_cmat_indices; 595 PetscInt size_of_constraint,j; 596 597 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 598 for (j=0;j<size_of_constraint;j++) { 599 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 600 } 601 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 602 } 603 if (F) { 604 Mat B; 605 606 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 607 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 608 ierr = MatDestroy(&B);CHKERRQ(ierr); 609 } else { 610 PetscScalar *xarray; 611 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 612 for (i=0;i<n_constraints;i++) { 613 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 614 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 615 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 616 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 617 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 618 } 619 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 620 } 621 622 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 623 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 625 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 626 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 627 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 628 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 629 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 630 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 631 ierr = MatDestroy(&M2);CHKERRQ(ierr); 632 ierr = MatDestroy(&M3);CHKERRQ(ierr); 633 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 634 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 635 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 636 ierr = MatDestroy(&M1);CHKERRQ(ierr); 637 } 638 639 /* Get submatrices from subdomain matrix */ 640 if (n_vertices) { 641 Mat newmat; 642 IS is_aux; 643 PetscInt ibs,mbs; 644 PetscBool issbaij; 645 646 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 647 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 648 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 649 if (ibs != mbs) { /* need to convert to SEQAIJ */ 650 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 651 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 652 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 653 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 654 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 655 } else { 656 /* this is safe */ 657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 658 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 659 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 660 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 661 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 662 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 663 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 664 } else { 665 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 666 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 667 } 668 } 669 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 670 } 671 672 /* Matrix of coarse basis functions (local) */ 673 if (pcbddc->coarse_phi_B) { 674 PetscInt on_B,on_primal,on_D=n_D; 675 if (pcbddc->coarse_phi_D) { 676 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 677 } 678 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 679 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 680 PetscScalar *marray; 681 682 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 683 ierr = PetscFree(marray);CHKERRQ(ierr); 684 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 688 } 689 } 690 691 if (!pcbddc->coarse_phi_B) { 692 PetscScalar *marray; 693 694 n = n_B*pcbddc->local_primal_size; 695 if (pcbddc->switch_static || pcbddc->dbg_flag) { 696 n += n_D*pcbddc->local_primal_size; 697 } 698 if (!pcbddc->issym) { 699 n *= 2; 700 } 701 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 703 n = n_B*pcbddc->local_primal_size; 704 if (pcbddc->switch_static || pcbddc->dbg_flag) { 705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 706 n += n_D*pcbddc->local_primal_size; 707 } 708 if (!pcbddc->issym) { 709 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 710 if (pcbddc->switch_static) { 711 n = n_B*pcbddc->local_primal_size; 712 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 713 } 714 } else { 715 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 717 if (pcbddc->switch_static) { 718 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 720 } 721 } 722 } 723 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 724 /* vertices */ 725 if (n_vertices) { 726 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 727 PetscScalar *x,*y; 728 PetscBLASInt B_N,B_one = 1; 729 730 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 731 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 732 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 733 if (F) { 734 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 735 } else { 736 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 737 for (i=0;i<n_vertices;i++) { 738 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 739 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 740 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 741 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 742 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 743 } 744 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 745 } 746 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 747 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 748 if (n_constraints) { 749 Mat B; 750 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 752 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 753 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 754 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 755 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 756 ierr = MatDestroy(&B);CHKERRQ(ierr); 757 } 758 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 759 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 760 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 761 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 762 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 763 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 764 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 765 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 766 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 767 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 768 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 769 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 770 /* coarse basis functions */ 771 for (i=0;i<n_vertices;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 773 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 774 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 775 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 777 y[n_B*i+idx_V_B[i]] = 1.0; 778 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 779 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 780 781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 782 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 783 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 784 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 785 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 786 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 787 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 788 } 789 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 790 } 791 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 792 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 793 } 794 795 if (n_constraints) { 796 Mat B; 797 798 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 799 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 800 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 801 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 802 if (n_vertices) { 803 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 804 } 805 ierr = MatDestroy(&B);CHKERRQ(ierr); 806 /* coarse basis functions */ 807 for (i=0;i<n_constraints;i++) { 808 PetscScalar *y; 809 810 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 811 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 812 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 813 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 814 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 815 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 816 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 817 818 if (pcbddc->switch_static || pcbddc->dbg_flag) { 819 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 820 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 821 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 824 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 825 } 826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 827 } 828 } 829 830 /* compute other basis functions for non-symmetric problems */ 831 if (!pcbddc->issym) { 832 Mat B,X; 833 834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 835 836 if (n_constraints) { 837 Mat S_VCT,S_CCT,B_C; 838 839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 840 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 841 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 842 if (n_vertices) { 843 Mat B_V; 844 845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 846 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 847 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 848 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 851 } 852 if (n_vertices) { 853 Mat A_VRT; 854 PetscBLASInt B_N,B_one = 1; 855 856 if (!n_constraints) { /* if there are no constraints, reset work */ 857 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 858 } 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 860 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 861 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 862 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 863 } 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 866 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 867 for (i=0;i<pcbddc->local_primal_size;i++) { 868 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 870 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 871 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 872 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 873 } 874 } else { 875 for (i=0;i<pcbddc->local_primal_size;i++) { 876 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 877 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 878 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 879 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 880 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 881 } 882 } 883 ierr = MatDestroy(&B);CHKERRQ(ierr); 884 /* coarse basis functions */ 885 for (i=0;i<pcbddc->local_primal_size;i++) { 886 PetscScalar *y; 887 888 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 889 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 890 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 891 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 if (i<n_vertices) { 894 y[n_B*i+idx_V_B[i]] = 1.0; 895 } 896 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 898 899 if (pcbddc->switch_static || pcbddc->dbg_flag) { 900 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 902 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 905 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 906 } 907 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 908 } 909 ierr = MatDestroy(&X);CHKERRQ(ierr); 910 } 911 912 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 913 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 914 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 915 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 916 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 917 /* Checking coarse_sub_mat and coarse basis functios */ 918 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 919 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 920 if (pcbddc->dbg_flag) { 921 Mat coarse_sub_mat; 922 Mat AUXMAT,TM1,TM2,TM3,TM4; 923 Mat coarse_phi_D,coarse_phi_B; 924 Mat coarse_psi_D,coarse_psi_B; 925 Mat A_II,A_BB,A_IB,A_BI; 926 MatType checkmattype=MATSEQAIJ; 927 PetscReal real_value; 928 929 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 930 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 931 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 932 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 933 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 934 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 935 if (unsymmetric_check) { 936 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 937 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 938 } 939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 940 941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 944 if (unsymmetric_check) { 945 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 946 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 947 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 948 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 949 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 950 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 951 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 952 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 953 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 954 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 955 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 956 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 957 } else { 958 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 959 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 960 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 961 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 962 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 963 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 964 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 965 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 966 } 967 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 968 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 969 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 970 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 971 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 972 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 973 974 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 977 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 978 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 979 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 980 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 981 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 982 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 983 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 984 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 985 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 986 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 987 if (unsymmetric_check) { 988 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 989 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 990 } 991 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 992 } 993 994 /* free memory */ 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* get back data */ 1003 *coarse_submat_vals_n = coarse_submat_vals; 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1009 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1010 { 1011 Mat *work_mat; 1012 IS isrow_s,iscol_s; 1013 PetscBool rsorted,csorted; 1014 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1019 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1020 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1021 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1022 1023 if (!rsorted) { 1024 const PetscInt *idxs; 1025 PetscInt *idxs_sorted,i; 1026 1027 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1028 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1029 for (i=0;i<rsize;i++) { 1030 idxs_perm_r[i] = i; 1031 } 1032 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1033 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1034 for (i=0;i<rsize;i++) { 1035 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1036 } 1037 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1038 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1039 } else { 1040 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1041 isrow_s = isrow; 1042 } 1043 1044 if (!csorted) { 1045 if (isrow == iscol) { 1046 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1047 iscol_s = isrow_s; 1048 } else { 1049 const PetscInt *idxs; 1050 PetscInt *idxs_sorted,i; 1051 1052 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1053 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1054 for (i=0;i<csize;i++) { 1055 idxs_perm_c[i] = i; 1056 } 1057 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1058 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1059 for (i=0;i<csize;i++) { 1060 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1061 } 1062 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1063 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1064 } 1065 } else { 1066 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1067 iscol_s = iscol; 1068 } 1069 1070 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1071 1072 if (!rsorted || !csorted) { 1073 Mat new_mat; 1074 IS is_perm_r,is_perm_c; 1075 1076 if (!rsorted) { 1077 PetscInt *idxs_r,i; 1078 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1079 for (i=0;i<rsize;i++) { 1080 idxs_r[idxs_perm_r[i]] = i; 1081 } 1082 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1083 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1084 } else { 1085 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1086 } 1087 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1088 1089 if (!csorted) { 1090 if (isrow_s == iscol_s) { 1091 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1092 is_perm_c = is_perm_r; 1093 } else { 1094 PetscInt *idxs_c,i; 1095 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1096 for (i=0;i<csize;i++) { 1097 idxs_c[idxs_perm_c[i]] = i; 1098 } 1099 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1100 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1101 } 1102 } else { 1103 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1104 } 1105 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1106 1107 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1108 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1109 work_mat[0] = new_mat; 1110 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1111 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1112 } 1113 1114 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1115 *B = work_mat[0]; 1116 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1117 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1118 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1124 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1125 { 1126 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 Mat new_mat; 1129 IS is_local,is_global; 1130 PetscInt local_size; 1131 PetscBool isseqaij; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1136 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1137 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1138 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1139 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1140 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1141 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1142 1143 /* check */ 1144 if (pcbddc->dbg_flag) { 1145 Vec x,x_change; 1146 PetscReal error; 1147 1148 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1149 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1150 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1153 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1154 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1157 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1160 ierr = VecDestroy(&x);CHKERRQ(ierr); 1161 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1162 } 1163 1164 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1165 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1166 if (isseqaij) { 1167 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1168 } else { 1169 Mat work_mat; 1170 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1172 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1173 } 1174 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1175 /* 1176 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1177 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1178 */ 1179 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1185 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1186 { 1187 PC_IS* pcis = (PC_IS*)(pc->data); 1188 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1189 IS is_aux1,is_aux2; 1190 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1191 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1192 PetscInt vbs,bs; 1193 PetscBT bitmask; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 /* 1198 No need to setup local scatters if 1199 - primal space is unchanged 1200 AND 1201 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1202 AND 1203 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1204 */ 1205 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1206 PetscFunctionReturn(0); 1207 } 1208 /* destroy old objects */ 1209 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1210 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1212 /* Set Non-overlapping dimensions */ 1213 n_B = pcis->n_B; n_D = pcis->n - n_B; 1214 n_vertices = pcbddc->n_actual_vertices; 1215 /* create auxiliary bitmask */ 1216 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1217 for (i=0;i<n_vertices;i++) { 1218 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1219 } 1220 1221 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1222 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1223 for (i=0, n_R=0; i<pcis->n; i++) { 1224 if (!PetscBTLookup(bitmask,i)) { 1225 idx_R_local[n_R] = i; 1226 n_R++; 1227 } 1228 } 1229 1230 /* Block code */ 1231 vbs = 1; 1232 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1233 if (bs>1 && !(n_vertices%bs)) { 1234 PetscBool is_blocked = PETSC_TRUE; 1235 PetscInt *vary; 1236 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1237 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1238 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1239 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1240 for (i=0; i<n_vertices; i++) { 1241 if (vary[i]!=0 && vary[i]!=bs) { 1242 is_blocked = PETSC_FALSE; 1243 break; 1244 } 1245 } 1246 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1247 vbs = bs; 1248 for (i=0;i<n_R/vbs;i++) { 1249 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1250 } 1251 } 1252 ierr = PetscFree(vary);CHKERRQ(ierr); 1253 } 1254 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1255 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1256 1257 /* print some info if requested */ 1258 if (pcbddc->dbg_flag) { 1259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1260 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1261 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1267 } 1268 1269 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1270 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1271 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1272 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1273 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1274 for (i=0; i<n_D; i++) { 1275 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1276 } 1277 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1278 for (i=0, j=0; i<n_R; i++) { 1279 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1280 aux_array1[j++] = i; 1281 } 1282 } 1283 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1284 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1285 for (i=0, j=0; i<n_B; i++) { 1286 if (!PetscBTLookup(bitmask,is_indices[i])) { 1287 aux_array2[j++] = i; 1288 } 1289 } 1290 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1291 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1292 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1293 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1294 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1295 1296 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1297 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1298 for (i=0, j=0; i<n_R; i++) { 1299 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1300 aux_array1[j++] = i; 1301 } 1302 } 1303 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1304 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1305 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1306 } 1307 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1308 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1315 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1316 { 1317 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1318 PC_IS *pcis = (PC_IS*)pc->data; 1319 PC pc_temp; 1320 Mat A_RR; 1321 MatReuse reuse; 1322 PetscScalar m_one = -1.0; 1323 PetscReal value; 1324 PetscInt n_D,n_R,ibs,mbs; 1325 PetscBool use_exact,use_exact_reduced,issbaij; 1326 PetscErrorCode ierr; 1327 /* prefixes stuff */ 1328 char dir_prefix[256],neu_prefix[256],str_level[16]; 1329 size_t len; 1330 1331 PetscFunctionBegin; 1332 1333 /* compute prefixes */ 1334 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1335 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1336 if (!pcbddc->current_level) { 1337 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1338 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1339 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1340 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1343 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1344 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1345 len -= 15; /* remove "pc_bddc_coarse_" */ 1346 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1347 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1348 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1349 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1350 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1351 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1352 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1353 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1354 } 1355 1356 /* DIRICHLET PROBLEM */ 1357 if (dirichlet) { 1358 /* Matrix for Dirichlet problem is pcis->A_II */ 1359 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1360 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1361 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1362 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1363 /* default */ 1364 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1365 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1366 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1367 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1368 if (issbaij) { 1369 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1370 } else { 1371 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1372 } 1373 /* Allow user's customization */ 1374 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1375 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1376 } 1377 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1378 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1379 if (!n_D) { 1380 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1381 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1382 } 1383 /* Set Up KSP for Dirichlet problem of BDDC */ 1384 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1385 /* set ksp_D into pcis data */ 1386 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1387 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1388 pcis->ksp_D = pcbddc->ksp_D; 1389 } 1390 1391 /* NEUMANN PROBLEM */ 1392 A_RR = 0; 1393 if (neumann) { 1394 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1395 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1396 if (pcbddc->ksp_R) { /* already created ksp */ 1397 PetscInt nn_R; 1398 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1399 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1400 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1401 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1402 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1403 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1404 reuse = MAT_INITIAL_MATRIX; 1405 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1406 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1407 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1408 reuse = MAT_INITIAL_MATRIX; 1409 } else { /* safe to reuse the matrix */ 1410 reuse = MAT_REUSE_MATRIX; 1411 } 1412 } 1413 /* last check */ 1414 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1415 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1416 reuse = MAT_INITIAL_MATRIX; 1417 } 1418 } else { /* first time, so we need to create the matrix */ 1419 reuse = MAT_INITIAL_MATRIX; 1420 } 1421 /* extract A_RR */ 1422 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1423 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1424 if (ibs != mbs) { 1425 Mat newmat; 1426 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1427 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1428 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1429 } else { 1430 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1431 } 1432 if (!pcbddc->ksp_R) { /* create object if not present */ 1433 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1434 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1435 /* default */ 1436 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1437 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1438 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1439 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1440 if (issbaij) { 1441 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1442 } else { 1443 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1444 } 1445 /* Allow user's customization */ 1446 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1447 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1448 } 1449 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1450 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1451 if (!n_R) { 1452 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1453 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1454 } 1455 /* Set Up KSP for Neumann problem of BDDC */ 1456 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1457 } 1458 1459 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1460 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1461 if (pcbddc->dbg_flag) { 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1465 } 1466 if (dirichlet) { /* Dirichlet */ 1467 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1468 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1469 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1470 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1471 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1472 /* need to be adapted? */ 1473 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1474 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1475 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1476 /* print info */ 1477 if (pcbddc->dbg_flag) { 1478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1480 } 1481 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1482 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1483 } 1484 } 1485 if (neumann) { /* Neumann */ 1486 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1487 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1488 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1489 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1490 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1491 /* need to be adapted? */ 1492 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1493 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1494 /* print info */ 1495 if (pcbddc->dbg_flag) { 1496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1497 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1498 } 1499 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1500 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1501 } 1502 } 1503 } 1504 /* free Neumann problem's matrix */ 1505 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1511 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1512 { 1513 PetscErrorCode ierr; 1514 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1515 1516 PetscFunctionBegin; 1517 if (applytranspose) { 1518 if (pcbddc->local_auxmat1) { 1519 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1520 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1521 } 1522 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1523 } else { 1524 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1525 if (pcbddc->local_auxmat1) { 1526 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1527 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1528 } 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 1533 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1536 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1537 { 1538 PetscErrorCode ierr; 1539 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1540 PC_IS* pcis = (PC_IS*) (pc->data); 1541 const PetscScalar zero = 0.0; 1542 1543 PetscFunctionBegin; 1544 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1545 if (applytranspose) { 1546 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1547 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1548 } else { 1549 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1550 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1551 } 1552 /* start communications from local primal nodes to rhs of coarse solver */ 1553 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1554 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1558 /* TODO remove null space when doing multilevel */ 1559 if (pcbddc->coarse_ksp) { 1560 if (applytranspose) { 1561 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1562 } else { 1563 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1564 } 1565 } 1566 1567 /* Local solution on R nodes */ 1568 if (pcis->n) { 1569 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1572 if (pcbddc->switch_static) { 1573 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1575 } 1576 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1577 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1578 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 if (pcbddc->switch_static) { 1581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 } 1584 } 1585 1586 /* communications from coarse sol to local primal nodes */ 1587 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1588 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1589 1590 /* Sum contributions from two levels */ 1591 if (applytranspose) { 1592 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1593 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1594 } else { 1595 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1604 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1605 { 1606 PetscErrorCode ierr; 1607 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1608 PetscScalar *array,*array2; 1609 Vec from,to; 1610 1611 PetscFunctionBegin; 1612 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1613 from = pcbddc->coarse_vec; 1614 to = pcbddc->vec1_P; 1615 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1616 Vec tvec; 1617 PetscInt lsize; 1618 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1619 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1620 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1621 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1622 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1623 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1624 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1625 } 1626 } else { /* from local to global -> put data in coarse right hand side */ 1627 from = pcbddc->vec1_P; 1628 to = pcbddc->coarse_vec; 1629 } 1630 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1636 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1637 { 1638 PetscErrorCode ierr; 1639 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1640 PetscScalar *array,*array2; 1641 Vec from,to; 1642 1643 PetscFunctionBegin; 1644 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1645 from = pcbddc->coarse_vec; 1646 to = pcbddc->vec1_P; 1647 } else { /* from local to global -> put data in coarse right hand side */ 1648 from = pcbddc->vec1_P; 1649 to = pcbddc->coarse_vec; 1650 } 1651 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1652 if (smode == SCATTER_FORWARD) { 1653 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1654 Vec tvec; 1655 PetscInt lsize; 1656 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1657 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1658 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1659 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1660 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1661 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1662 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1663 } 1664 } 1665 PetscFunctionReturn(0); 1666 } 1667 1668 /* uncomment for testing purposes */ 1669 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1672 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1673 { 1674 PetscErrorCode ierr; 1675 PC_IS* pcis = (PC_IS*)(pc->data); 1676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1677 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1678 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1679 MatType impMatType=MATSEQAIJ; 1680 /* one and zero */ 1681 PetscScalar one=1.0,zero=0.0; 1682 /* space to store constraints and their local indices */ 1683 PetscScalar *temp_quadrature_constraint; 1684 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1685 /* iterators */ 1686 PetscInt i,j,k,total_counts,temp_start_ptr; 1687 /* stuff to store connected components stored in pcbddc->mat_graph */ 1688 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1689 PetscInt n_ISForFaces,n_ISForEdges; 1690 /* near null space stuff */ 1691 MatNullSpace nearnullsp; 1692 const Vec *nearnullvecs; 1693 Vec *localnearnullsp; 1694 PetscBool nnsp_has_cnst; 1695 PetscInt nnsp_size; 1696 PetscScalar *array; 1697 /* BLAS integers */ 1698 PetscBLASInt lwork,lierr; 1699 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1700 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1701 /* LAPACK working arrays for SVD or POD */ 1702 PetscBool skip_lapack; 1703 PetscScalar *work; 1704 PetscReal *singular_vals; 1705 #if defined(PETSC_USE_COMPLEX) 1706 PetscReal *rwork; 1707 #endif 1708 #if defined(PETSC_MISSING_LAPACK_GESVD) 1709 PetscBLASInt Blas_one_2=1; 1710 PetscScalar *temp_basis,*correlation_mat; 1711 #else 1712 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1713 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1714 #endif 1715 /* reuse */ 1716 PetscInt olocal_primal_size; 1717 PetscInt *oprimal_indices_local_idxs; 1718 /* change of basis */ 1719 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1720 PetscBool boolforchange,qr_needed; 1721 PetscBT touched,change_basis,qr_needed_idx; 1722 /* auxiliary stuff */ 1723 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1724 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1725 PetscScalar *temp_quadrature_constraint_work; 1726 /* some quantities */ 1727 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1728 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1729 1730 1731 PetscFunctionBegin; 1732 /* Destroy Mat objects computed previously */ 1733 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1734 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1735 /* TODO synch with adaptive selection */ 1736 /* Get index sets for faces, edges and vertices from graph */ 1737 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1738 /* free unneeded index sets */ 1739 if (!pcbddc->use_vertices) { 1740 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1741 } 1742 if (!pcbddc->use_edges) { 1743 for (i=0;i<n_ISForEdges;i++) { 1744 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1745 } 1746 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1747 n_ISForEdges = 0; 1748 } 1749 if (!pcbddc->use_faces) { 1750 for (i=0;i<n_ISForFaces;i++) { 1751 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1752 } 1753 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1754 n_ISForFaces = 0; 1755 } 1756 /* HACKS (the following two blocks of code) */ 1757 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1758 pcbddc->use_change_of_basis = PETSC_TRUE; 1759 if (!ISForEdges) { 1760 pcbddc->use_change_on_faces = PETSC_TRUE; 1761 } 1762 } 1763 if (pcbddc->NullSpace) { 1764 /* use_change_of_basis should be consistent among processors */ 1765 PetscBool tbool[2],gbool[2]; 1766 tbool [0] = pcbddc->use_change_of_basis; 1767 tbool [1] = pcbddc->use_change_on_faces; 1768 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1769 pcbddc->use_change_of_basis = gbool[0]; 1770 pcbddc->use_change_on_faces = gbool[1]; 1771 } 1772 /* print some info */ 1773 if (pcbddc->dbg_flag) { 1774 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1776 i = 0; 1777 if (ISForVertices) { 1778 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1779 } 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1782 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1783 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1784 } 1785 1786 if (!pcbddc->adaptive_selection) { 1787 /* check if near null space is attached to global mat */ 1788 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1789 if (nearnullsp) { 1790 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1791 /* remove any stored info */ 1792 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1793 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1794 /* store information for BDDC solver reuse */ 1795 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1796 pcbddc->onearnullspace = nearnullsp; 1797 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1798 for (i=0;i<nnsp_size;i++) { 1799 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1800 } 1801 } else { /* if near null space is not provided BDDC uses constants by default */ 1802 nnsp_size = 0; 1803 nnsp_has_cnst = PETSC_TRUE; 1804 } 1805 /* get max number of constraints on a single cc */ 1806 max_constraints = nnsp_size; 1807 if (nnsp_has_cnst) max_constraints++; 1808 1809 /* 1810 Evaluate maximum storage size needed by the procedure 1811 - temp_indices will contain start index of each constraint stored as follows 1812 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1813 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1814 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1815 */ 1816 total_counts = n_ISForFaces+n_ISForEdges; 1817 total_counts *= max_constraints; 1818 n_vertices = 0; 1819 if (ISForVertices) { 1820 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1821 } 1822 total_counts += n_vertices; 1823 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1824 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1825 total_counts = 0; 1826 max_size_of_constraint = 0; 1827 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1828 if (i<n_ISForEdges) { 1829 used_IS = &ISForEdges[i]; 1830 } else { 1831 used_IS = &ISForFaces[i-n_ISForEdges]; 1832 } 1833 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1834 total_counts += j; 1835 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1836 } 1837 total_counts *= max_constraints; 1838 total_counts += n_vertices; 1839 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1840 /* get local part of global near null space vectors */ 1841 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1842 for (k=0;k<nnsp_size;k++) { 1843 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1844 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 } 1847 1848 /* whether or not to skip lapack calls */ 1849 skip_lapack = PETSC_TRUE; 1850 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1851 1852 /* allocate some auxiliary stuff */ 1853 if (!skip_lapack || pcbddc->use_qr_single) { 1854 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1855 } 1856 1857 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1858 if (!skip_lapack) { 1859 PetscScalar temp_work; 1860 1861 #if defined(PETSC_MISSING_LAPACK_GESVD) 1862 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1863 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1864 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1865 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1866 #if defined(PETSC_USE_COMPLEX) 1867 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1868 #endif 1869 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1870 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1872 lwork = -1; 1873 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1874 #if !defined(PETSC_USE_COMPLEX) 1875 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1876 #else 1877 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1878 #endif 1879 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1880 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1881 #else /* on missing GESVD */ 1882 /* SVD */ 1883 PetscInt max_n,min_n; 1884 max_n = max_size_of_constraint; 1885 min_n = max_constraints; 1886 if (max_size_of_constraint < max_constraints) { 1887 min_n = max_size_of_constraint; 1888 max_n = max_constraints; 1889 } 1890 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1891 #if defined(PETSC_USE_COMPLEX) 1892 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1893 #endif 1894 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1895 lwork = -1; 1896 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1897 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1899 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1900 #if !defined(PETSC_USE_COMPLEX) 1901 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1902 #else 1903 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1904 #endif 1905 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1906 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1907 #endif /* on missing GESVD */ 1908 /* Allocate optimal workspace */ 1909 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1910 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1911 } 1912 /* Now we can loop on constraining sets */ 1913 total_counts = 0; 1914 temp_indices[0] = 0; 1915 /* vertices */ 1916 if (ISForVertices) { 1917 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1918 if (nnsp_has_cnst) { /* consider all vertices */ 1919 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1920 for (i=0;i<n_vertices;i++) { 1921 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1922 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1923 total_counts++; 1924 } 1925 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1926 PetscBool used_vertex; 1927 for (i=0;i<n_vertices;i++) { 1928 used_vertex = PETSC_FALSE; 1929 k = 0; 1930 while (!used_vertex && k<nnsp_size) { 1931 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1932 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1933 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1934 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1935 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1936 total_counts++; 1937 used_vertex = PETSC_TRUE; 1938 } 1939 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1940 k++; 1941 } 1942 } 1943 } 1944 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1945 n_vertices = total_counts; 1946 } 1947 1948 /* edges and faces */ 1949 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1950 if (ncc<n_ISForEdges) { 1951 used_IS = &ISForEdges[ncc]; 1952 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1953 } else { 1954 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1955 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1956 } 1957 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1958 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1959 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1960 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1961 /* change of basis should not be performed on local periodic nodes */ 1962 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1963 if (nnsp_has_cnst) { 1964 PetscScalar quad_value; 1965 temp_constraints++; 1966 if (!pcbddc->use_nnsp_true) { 1967 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1968 } else { 1969 quad_value = 1.0; 1970 } 1971 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1972 for (j=0;j<size_of_constraint;j++) { 1973 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1974 } 1975 /* sort by global ordering if using lapack subroutines (not needed!) */ 1976 if (!skip_lapack || pcbddc->use_qr_single) { 1977 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1978 for (j=0;j<size_of_constraint;j++) { 1979 permutation[j]=j; 1980 } 1981 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1982 for (j=0;j<size_of_constraint;j++) { 1983 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 1984 } 1985 for (j=0;j<size_of_constraint;j++) { 1986 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1987 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1988 } 1989 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1990 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1991 } 1992 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1993 total_counts++; 1994 } 1995 for (k=0;k<nnsp_size;k++) { 1996 PetscReal real_value; 1997 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1998 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1999 for (j=0;j<size_of_constraint;j++) { 2000 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2001 } 2002 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2003 /* check if array is null on the connected component */ 2004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2005 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2006 if (real_value > 0.0) { /* keep indices and values */ 2007 /* sort by global ordering if using lapack subroutines */ 2008 if (!skip_lapack || pcbddc->use_qr_single) { 2009 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2010 for (j=0;j<size_of_constraint;j++) { 2011 permutation[j]=j; 2012 } 2013 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2014 for (j=0;j<size_of_constraint;j++) { 2015 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2016 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2017 } 2018 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2019 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2020 } 2021 temp_constraints++; 2022 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2023 total_counts++; 2024 } 2025 } 2026 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2027 valid_constraints = temp_constraints; 2028 if (!pcbddc->use_nnsp_true && temp_constraints) { 2029 if (temp_constraints == 1) { /* just normalize the constraint */ 2030 PetscScalar norm; 2031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2032 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2033 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2034 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2035 } else { /* perform SVD */ 2036 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2037 2038 #if defined(PETSC_MISSING_LAPACK_GESVD) 2039 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2040 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2041 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2042 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2043 from that computed using LAPACKgesvd 2044 -> This is due to a different computation of eigenvectors in LAPACKheev 2045 -> The quality of the POD-computed basis will be the same */ 2046 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2047 /* Store upper triangular part of correlation matrix */ 2048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2049 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2050 for (j=0;j<temp_constraints;j++) { 2051 for (k=0;k<j+1;k++) { 2052 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2053 } 2054 } 2055 /* compute eigenvalues and eigenvectors of correlation matrix */ 2056 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2058 #if !defined(PETSC_USE_COMPLEX) 2059 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2060 #else 2061 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2062 #endif 2063 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2064 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2065 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2066 j = 0; 2067 while (j < temp_constraints && singular_vals[j] < tol) j++; 2068 total_counts = total_counts-j; 2069 valid_constraints = temp_constraints-j; 2070 /* scale and copy POD basis into used quadrature memory */ 2071 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2072 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2073 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2075 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2077 if (j<temp_constraints) { 2078 PetscInt ii; 2079 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2080 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2081 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2082 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2083 for (k=0;k<temp_constraints-j;k++) { 2084 for (ii=0;ii<size_of_constraint;ii++) { 2085 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2086 } 2087 } 2088 } 2089 #else /* on missing GESVD */ 2090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2093 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2094 #if !defined(PETSC_USE_COMPLEX) 2095 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2096 #else 2097 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2098 #endif 2099 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2102 k = temp_constraints; 2103 if (k > size_of_constraint) k = size_of_constraint; 2104 j = 0; 2105 while (j < k && singular_vals[k-j-1] < tol) j++; 2106 valid_constraints = k-j; 2107 total_counts = total_counts-temp_constraints+valid_constraints; 2108 #endif /* on missing GESVD */ 2109 } 2110 } 2111 /* setting change_of_basis flag is safe now */ 2112 if (boolforchange) { 2113 for (j=0;j<valid_constraints;j++) { 2114 PetscBTSet(change_basis,total_counts-j-1); 2115 } 2116 } 2117 } 2118 /* free workspace */ 2119 if (!skip_lapack || pcbddc->use_qr_single) { 2120 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2121 } 2122 if (!skip_lapack) { 2123 ierr = PetscFree(work);CHKERRQ(ierr); 2124 #if defined(PETSC_USE_COMPLEX) 2125 ierr = PetscFree(rwork);CHKERRQ(ierr); 2126 #endif 2127 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2128 #if defined(PETSC_MISSING_LAPACK_GESVD) 2129 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2130 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2131 #endif 2132 } 2133 for (k=0;k<nnsp_size;k++) { 2134 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2135 } 2136 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2137 } else { 2138 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2139 PetscInt cum = 0; 2140 2141 total_counts = 0; 2142 n_vertices = 0; 2143 if (sub_schurs->is_Ej_com) { 2144 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2145 } 2146 max_constraints = 0; 2147 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2148 total_counts += pcbddc->adaptive_constraints_n[i]; 2149 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2150 } 2151 temp_indices = pcbddc->adaptive_constraints_ptrs; 2152 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2153 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2154 2155 #if 0 2156 printf("Found %d totals\n",total_counts); 2157 for (i=0;i<total_counts;i++) { 2158 printf("const %d, start %d",i,temp_indices[i]); 2159 printf(" end %d:\n",temp_indices[i+1]); 2160 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2161 printf(" idxs %d",temp_indices_to_constraint[j]); 2162 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2163 } 2164 } 2165 #endif 2166 2167 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2168 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2169 /* Change of basis */ 2170 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2171 if (pcbddc->use_change_of_basis) { 2172 cum = n_vertices; 2173 for (i=0;i<sub_schurs->n_subs;i++) { 2174 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2175 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2176 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2177 } 2178 } 2179 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2180 } 2181 } 2182 } 2183 2184 /* free index sets of faces, edges and vertices */ 2185 for (i=0;i<n_ISForFaces;i++) { 2186 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2187 } 2188 if (n_ISForFaces) { 2189 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2190 } 2191 for (i=0;i<n_ISForEdges;i++) { 2192 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2193 } 2194 if (n_ISForEdges) { 2195 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2196 } 2197 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2198 2199 /* map temp_indices_to_constraint in boundary numbering */ 2200 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2201 if (i != temp_indices[total_counts]) { 2202 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2203 } 2204 2205 /* set quantities in pcbddc data structure and store previous primal size */ 2206 /* n_vertices defines the number of subdomain corners in the primal space */ 2207 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2208 olocal_primal_size = pcbddc->local_primal_size; 2209 pcbddc->local_primal_size = total_counts; 2210 pcbddc->n_vertices = n_vertices; 2211 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2212 2213 /* Create constraint matrix */ 2214 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2215 /* so we need to set it up properly either with or without change of basis */ 2216 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2217 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2218 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2219 /* array to compute a local numbering of constraints : vertices first then constraints */ 2220 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2221 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2222 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2223 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2224 /* auxiliary stuff for basis change */ 2225 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2226 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2227 2228 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2229 total_primal_vertices=0; 2230 for (i=0;i<pcbddc->local_primal_size;i++) { 2231 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2232 if (size_of_constraint == 1) { 2233 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2234 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2235 aux_primal_minloc[total_primal_vertices]=0; 2236 total_primal_vertices++; 2237 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2238 PetscInt min_loc,min_index; 2239 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2240 /* find first untouched local node */ 2241 k = 0; 2242 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2243 min_index = global_indices[k]; 2244 min_loc = k; 2245 /* search the minimum among global nodes already untouched on the cc */ 2246 for (k=1;k<size_of_constraint;k++) { 2247 /* there can be more than one constraint on a single connected component */ 2248 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2249 min_index = global_indices[k]; 2250 min_loc = k; 2251 } 2252 } 2253 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2254 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2255 aux_primal_minloc[total_primal_vertices]=min_loc; 2256 total_primal_vertices++; 2257 } 2258 } 2259 /* determine if a QR strategy is needed for change of basis */ 2260 qr_needed = PETSC_FALSE; 2261 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2262 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2263 if (PetscBTLookup(change_basis,i)) { 2264 if (!pcbddc->use_qr_single) { 2265 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2266 j = 0; 2267 for (k=0;k<size_of_constraint;k++) { 2268 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2269 j++; 2270 } 2271 } 2272 /* found more than one primal dof on the cc */ 2273 if (j > 1) { 2274 PetscBTSet(qr_needed_idx,i); 2275 qr_needed = PETSC_TRUE; 2276 } 2277 } else { 2278 PetscBTSet(qr_needed_idx,i); 2279 qr_needed = PETSC_TRUE; 2280 } 2281 } 2282 } 2283 /* free workspace */ 2284 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2285 2286 /* permute indices in order to have a sorted set of vertices */ 2287 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2288 2289 /* nonzero structure of constraint matrix */ 2290 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2291 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2292 j=total_primal_vertices; 2293 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2294 if (!PetscBTLookup(change_basis,i)) { 2295 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2296 j++; 2297 } 2298 } 2299 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2300 ierr = PetscFree(nnz);CHKERRQ(ierr); 2301 /* set values in constraint matrix */ 2302 for (i=0;i<total_primal_vertices;i++) { 2303 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2304 } 2305 total_counts = total_primal_vertices; 2306 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2307 if (!PetscBTLookup(change_basis,i)) { 2308 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2309 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2310 total_counts++; 2311 } 2312 } 2313 /* assembling */ 2314 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2315 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2316 /* 2317 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2318 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2319 */ 2320 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2321 if (pcbddc->use_change_of_basis) { 2322 /* dual and primal dofs on a single cc */ 2323 PetscInt dual_dofs,primal_dofs; 2324 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2325 PetscInt primal_counter; 2326 /* working stuff for GEQRF */ 2327 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2328 PetscBLASInt lqr_work; 2329 /* working stuff for UNGQR */ 2330 PetscScalar *gqr_work,lgqr_work_t; 2331 PetscBLASInt lgqr_work; 2332 /* working stuff for TRTRS */ 2333 PetscScalar *trs_rhs; 2334 PetscBLASInt Blas_NRHS; 2335 /* pointers for values insertion into change of basis matrix */ 2336 PetscInt *start_rows,*start_cols; 2337 PetscScalar *start_vals; 2338 /* working stuff for values insertion */ 2339 PetscBT is_primal; 2340 /* matrix sizes */ 2341 PetscInt global_size,local_size; 2342 /* temporary change of basis */ 2343 Mat localChangeOfBasisMatrix; 2344 2345 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2346 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2347 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2348 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2349 2350 /* nonzeros for local mat */ 2351 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2352 for (i=0;i<pcis->n;i++) nnz[i]=1; 2353 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2354 if (PetscBTLookup(change_basis,i)) { 2355 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2356 if (PetscBTLookup(qr_needed_idx,i)) { 2357 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2358 } else { 2359 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2360 /* get local primal index on the cc */ 2361 j = 0; 2362 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2363 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2364 } 2365 } 2366 } 2367 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2368 ierr = PetscFree(nnz);CHKERRQ(ierr); 2369 /* Set initial identity in the matrix */ 2370 for (i=0;i<pcis->n;i++) { 2371 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2372 } 2373 2374 if (pcbddc->dbg_flag) { 2375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2377 } 2378 2379 2380 /* Now we loop on the constraints which need a change of basis */ 2381 /* 2382 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2383 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2384 2385 Basic blocks of change of basis matrix T computed by 2386 2387 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2388 2389 | 1 0 ... 0 s_1/S | 2390 | 0 1 ... 0 s_2/S | 2391 | ... | 2392 | 0 ... 1 s_{n-1}/S | 2393 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2394 2395 with S = \sum_{i=1}^n s_i^2 2396 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2397 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2398 2399 - QR decomposition of constraints otherwise 2400 */ 2401 if (qr_needed) { 2402 /* space to store Q */ 2403 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2404 /* first we issue queries for optimal work */ 2405 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2406 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2407 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2408 lqr_work = -1; 2409 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2410 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2411 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2412 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2413 lgqr_work = -1; 2414 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2416 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2417 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2418 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2419 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2421 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2422 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2423 /* array to store scaling factors for reflectors */ 2424 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2425 /* array to store rhs and solution of triangular solver */ 2426 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2427 /* allocating workspace for check */ 2428 if (pcbddc->dbg_flag) { 2429 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2430 } 2431 } 2432 /* array to store whether a node is primal or not */ 2433 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2434 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2435 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2436 if (i != total_primal_vertices) { 2437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2438 } 2439 for (i=0;i<total_primal_vertices;i++) { 2440 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2441 } 2442 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2443 2444 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2445 /* -> using implicit ordering contained in temp_indices data */ 2446 total_counts = pcbddc->n_vertices; 2447 primal_counter = total_counts; 2448 while (total_counts<pcbddc->local_primal_size) { 2449 primal_dofs = 1; 2450 if (PetscBTLookup(change_basis,total_counts)) { 2451 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2452 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2453 primal_dofs++; 2454 } 2455 /* get constraint info */ 2456 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2457 dual_dofs = size_of_constraint-primal_dofs; 2458 2459 if (pcbddc->dbg_flag) { 2460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2461 } 2462 2463 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2464 2465 /* copy quadrature constraints for change of basis check */ 2466 if (pcbddc->dbg_flag) { 2467 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2468 } 2469 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2470 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2471 2472 /* compute QR decomposition of constraints */ 2473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2474 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2475 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2476 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2477 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2478 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2479 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2480 2481 /* explictly compute R^-T */ 2482 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2483 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2484 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2485 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2487 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2488 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2489 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2490 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2491 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2492 2493 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2495 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2496 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2498 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2499 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2500 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2501 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2502 2503 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2504 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2505 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2507 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2508 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2510 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2512 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2513 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2514 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2515 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2516 2517 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2518 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2519 /* insert cols for primal dofs */ 2520 for (j=0;j<primal_dofs;j++) { 2521 start_vals = &qr_basis[j*size_of_constraint]; 2522 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2523 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2524 } 2525 /* insert cols for dual dofs */ 2526 for (j=0,k=0;j<dual_dofs;k++) { 2527 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2528 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2529 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2530 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2531 j++; 2532 } 2533 } 2534 2535 /* check change of basis */ 2536 if (pcbddc->dbg_flag) { 2537 PetscInt ii,jj; 2538 PetscBool valid_qr=PETSC_TRUE; 2539 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2541 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2543 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2544 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2545 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2546 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2547 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2548 for (jj=0;jj<size_of_constraint;jj++) { 2549 for (ii=0;ii<primal_dofs;ii++) { 2550 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2551 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2552 } 2553 } 2554 if (!valid_qr) { 2555 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2556 for (jj=0;jj<size_of_constraint;jj++) { 2557 for (ii=0;ii<primal_dofs;ii++) { 2558 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2559 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2560 } 2561 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2562 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2563 } 2564 } 2565 } 2566 } else { 2567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2568 } 2569 } 2570 } else { /* simple transformation block */ 2571 PetscInt row,col; 2572 PetscScalar val,norm; 2573 2574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2575 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2576 for (j=0;j<size_of_constraint;j++) { 2577 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2578 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2579 if (!PetscBTLookup(is_primal,row_B)) { 2580 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2581 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2582 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2583 } else { 2584 for (k=0;k<size_of_constraint;k++) { 2585 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2586 if (row != col) { 2587 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2588 } else { 2589 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2590 } 2591 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2592 } 2593 } 2594 } 2595 if (pcbddc->dbg_flag) { 2596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2597 } 2598 } 2599 /* increment primal counter */ 2600 primal_counter += primal_dofs; 2601 } else { 2602 if (pcbddc->dbg_flag) { 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2604 } 2605 } 2606 /* increment constraint counter total_counts */ 2607 total_counts += primal_dofs; 2608 } 2609 2610 /* free workspace */ 2611 if (qr_needed) { 2612 if (pcbddc->dbg_flag) { 2613 ierr = PetscFree(work);CHKERRQ(ierr); 2614 } 2615 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2616 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2617 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2618 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2619 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2620 } 2621 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2622 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2623 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2624 2625 /* assembling of global change of variable */ 2626 { 2627 Mat tmat; 2628 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2629 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2630 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2631 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2632 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2633 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2634 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2635 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2636 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2637 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2638 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2639 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2640 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2641 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2642 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2643 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2644 } 2645 /* check */ 2646 if (pcbddc->dbg_flag) { 2647 PetscReal error; 2648 Vec x,x_change; 2649 2650 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2651 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2652 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2653 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2654 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2655 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2656 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2657 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2658 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2659 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2660 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2661 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2662 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2664 ierr = VecDestroy(&x);CHKERRQ(ierr); 2665 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2666 } 2667 2668 /* adapt sub_schurs computed (if any) */ 2669 if (pcbddc->use_deluxe_scaling) { 2670 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2671 if (sub_schurs->n_subs_par_g) { 2672 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2673 } 2674 if (sub_schurs->S_Ej_all) { 2675 Mat S_1,S_2,tmat; 2676 IS is_all_N; 2677 2678 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2679 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2680 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2681 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2682 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2683 sub_schurs->S_Ej_all = S_1; 2684 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2685 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2686 sub_schurs->sum_S_Ej_all = S_2; 2687 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2688 } 2689 } 2690 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2691 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2692 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2693 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2694 } 2695 2696 /* set up change of basis context */ 2697 if (pcbddc->ChangeOfBasisMatrix) { 2698 PCBDDCChange_ctx change_ctx; 2699 2700 if (!pcbddc->new_global_mat) { 2701 PetscInt global_size,local_size; 2702 2703 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2704 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2705 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2706 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2707 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2708 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2709 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2710 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2711 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2712 } else { 2713 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2714 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2715 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2716 } 2717 if (!pcbddc->user_ChangeOfBasisMatrix) { 2718 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2719 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2720 } else { 2721 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2722 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2723 } 2724 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2725 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2726 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2727 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2728 } 2729 2730 /* get indices in local ordering for vertices and constraints */ 2731 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2732 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2733 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2734 } 2735 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2736 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2737 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2738 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2739 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2740 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2741 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2742 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2743 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2744 /* set quantities in PCBDDC data struct */ 2745 pcbddc->n_actual_vertices = i; 2746 /* check if a new primal space has been introduced */ 2747 pcbddc->new_primal_space_local = PETSC_TRUE; 2748 if (olocal_primal_size == pcbddc->local_primal_size) { 2749 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2750 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2751 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2752 } 2753 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2754 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2755 2756 /* flush dbg viewer */ 2757 if (pcbddc->dbg_flag) { 2758 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2759 } 2760 2761 /* free workspace */ 2762 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2763 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2764 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2765 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2766 if (!pcbddc->adaptive_selection) { 2767 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2768 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2769 } else { 2770 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2771 pcbddc->adaptive_constraints_ptrs, 2772 pcbddc->adaptive_constraints_idxs, 2773 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2774 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2775 } 2776 PetscFunctionReturn(0); 2777 } 2778 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2781 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2782 { 2783 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2784 PC_IS *pcis = (PC_IS*)pc->data; 2785 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2786 PetscInt ierr,i,vertex_size; 2787 PetscViewer viewer=pcbddc->dbg_viewer; 2788 2789 PetscFunctionBegin; 2790 /* Reset previously computed graph */ 2791 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2792 /* Init local Graph struct */ 2793 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2794 2795 /* Check validity of the csr graph passed in by the user */ 2796 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2797 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2798 } 2799 2800 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2801 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2802 Mat mat_adj; 2803 PetscInt *xadj,*adjncy; 2804 PetscInt nvtxs; 2805 PetscBool flg_row=PETSC_TRUE; 2806 2807 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2808 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2809 if (!flg_row) { 2810 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2811 } 2812 if (pcbddc->use_local_adj) { 2813 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2814 pcbddc->computed_rowadj = PETSC_TRUE; 2815 } else { /* just compute subdomain's connected components */ 2816 IS is_dummy; 2817 ISLocalToGlobalMapping l2gmap_dummy; 2818 PetscInt j,sum; 2819 PetscInt *cxadj,*cadjncy; 2820 const PetscInt *idxs; 2821 PCBDDCGraph graph; 2822 PetscBT is_on_boundary; 2823 2824 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2825 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2826 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2827 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2828 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2829 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2830 graph->xadj = xadj; 2831 graph->adjncy = adjncy; 2832 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2833 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2834 2835 if (pcbddc->dbg_flag) { 2836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2837 for (i=0;i<graph->ncc;i++) { 2838 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2839 } 2840 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2841 } 2842 2843 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2844 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2845 for (i=0;i<pcis->n_B;i++) { 2846 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2847 } 2848 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2849 2850 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2851 sum = 0; 2852 for (i=0;i<graph->ncc;i++) { 2853 PetscInt sizecc = 0; 2854 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2855 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2856 sizecc++; 2857 } 2858 } 2859 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2860 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2861 cxadj[graph->queue[j]] = sizecc; 2862 } 2863 } 2864 sum += sizecc*sizecc; 2865 } 2866 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2867 sum = 0; 2868 for (i=0;i<nvtxs;i++) { 2869 PetscInt temp = cxadj[i]; 2870 cxadj[i] = sum; 2871 sum += temp; 2872 } 2873 cxadj[nvtxs] = sum; 2874 for (i=0;i<graph->ncc;i++) { 2875 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2876 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2877 PetscInt k,sizecc = 0; 2878 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2879 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2880 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2881 sizecc++; 2882 } 2883 } 2884 } 2885 } 2886 } 2887 if (nvtxs) { 2888 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2889 } else { 2890 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2891 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2892 } 2893 graph->xadj = 0; 2894 graph->adjncy = 0; 2895 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2896 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2897 } 2898 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2899 if (!flg_row) { 2900 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2901 } 2902 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2903 } 2904 2905 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2906 vertex_size = 1; 2907 if (pcbddc->user_provided_isfordofs) { 2908 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2909 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2910 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2911 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2912 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2913 } 2914 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2915 pcbddc->n_ISForDofs = 0; 2916 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2917 } 2918 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2919 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2920 } else { 2921 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2922 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2923 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2924 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2925 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2926 } 2927 } 2928 } 2929 2930 /* Setup of Graph */ 2931 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2932 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2933 } 2934 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2935 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2936 } 2937 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2938 2939 /* Graph's connected components analysis */ 2940 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2941 2942 /* print some info to stdout */ 2943 if (pcbddc->dbg_flag) { 2944 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2945 } 2946 2947 /* mark topography has done */ 2948 pcbddc->recompute_topography = PETSC_FALSE; 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2954 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2955 { 2956 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2957 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2958 PetscErrorCode ierr; 2959 2960 PetscFunctionBegin; 2961 n = 0; 2962 vertices = 0; 2963 if (pcbddc->ConstraintMatrix) { 2964 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2965 for (i=0;i<local_primal_size;i++) { 2966 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2967 if (size_of_constraint == 1) n++; 2968 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2969 } 2970 if (vertices_idx) { 2971 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2972 n = 0; 2973 for (i=0;i<local_primal_size;i++) { 2974 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2975 if (size_of_constraint == 1) { 2976 vertices[n++]=row_cmat_indices[0]; 2977 } 2978 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2979 } 2980 } 2981 } 2982 *n_vertices = n; 2983 if (vertices_idx) *vertices_idx = vertices; 2984 PetscFunctionReturn(0); 2985 } 2986 2987 #undef __FUNCT__ 2988 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2989 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2990 { 2991 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2992 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2993 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2994 PetscBT touched; 2995 PetscErrorCode ierr; 2996 2997 /* This function assumes that the number of local constraints per connected component 2998 is not greater than the number of nodes defined for the connected component 2999 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3000 PetscFunctionBegin; 3001 n = 0; 3002 constraints_index = 0; 3003 if (pcbddc->ConstraintMatrix) { 3004 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3005 max_size_of_constraint = 0; 3006 for (i=0;i<local_primal_size;i++) { 3007 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3008 if (size_of_constraint > 1) { 3009 n++; 3010 } 3011 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3012 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3013 } 3014 if (constraints_idx) { 3015 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3016 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3017 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3018 n = 0; 3019 for (i=0;i<local_primal_size;i++) { 3020 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3021 if (size_of_constraint > 1) { 3022 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3023 /* find first untouched local node */ 3024 j = 0; 3025 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3026 min_index = row_cmat_global_indices[j]; 3027 min_loc = j; 3028 /* search the minimum among nodes not yet touched on the connected component 3029 since there can be more than one constraint on a single cc */ 3030 for (j=1;j<size_of_constraint;j++) { 3031 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3032 min_index = row_cmat_global_indices[j]; 3033 min_loc = j; 3034 } 3035 } 3036 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3037 constraints_index[n++] = row_cmat_indices[min_loc]; 3038 } 3039 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3040 } 3041 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3042 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3043 } 3044 } 3045 *n_constraints = n; 3046 if (constraints_idx) *constraints_idx = constraints_index; 3047 PetscFunctionReturn(0); 3048 } 3049 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "PCBDDCSubsetNumbering" 3052 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3053 { 3054 Vec local_vec,global_vec; 3055 IS seqis,paris; 3056 VecScatter scatter_ctx; 3057 PetscScalar *array; 3058 PetscInt *temp_global_dofs; 3059 PetscScalar globalsum; 3060 PetscInt i,j,s; 3061 PetscInt nlocals,first_index,old_index,max_local; 3062 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3063 PetscMPIInt *dof_sizes,*dof_displs; 3064 PetscBool first_found; 3065 PetscErrorCode ierr; 3066 3067 PetscFunctionBegin; 3068 /* mpi buffers */ 3069 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3070 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3071 j = ( !rank_prec_comm ? size_prec_comm : 0); 3072 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3073 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3074 /* get maximum size of subset */ 3075 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3076 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3077 max_local = 0; 3078 for (i=0;i<n_local_dofs;i++) { 3079 if (max_local < temp_global_dofs[i] ) { 3080 max_local = temp_global_dofs[i]; 3081 } 3082 } 3083 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3084 max_global++; 3085 max_local = 0; 3086 for (i=0;i<n_local_dofs;i++) { 3087 if (max_local < local_dofs[i] ) { 3088 max_local = local_dofs[i]; 3089 } 3090 } 3091 max_local++; 3092 /* allocate workspace */ 3093 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3094 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3095 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3096 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3097 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3098 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3099 /* create scatter */ 3100 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3101 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3102 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3103 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3104 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3105 /* init array */ 3106 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3107 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3108 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3109 if (local_dofs_mult) { 3110 for (i=0;i<n_local_dofs;i++) { 3111 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3112 } 3113 } else { 3114 for (i=0;i<n_local_dofs;i++) { 3115 array[local_dofs[i]]=1.0; 3116 } 3117 } 3118 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3119 /* scatter into global vec and get total number of global dofs */ 3120 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3121 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3122 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3123 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3124 /* Fill global_vec with cumulative function for global numbering */ 3125 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3126 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3127 nlocals = 0; 3128 first_index = -1; 3129 first_found = PETSC_FALSE; 3130 for (i=0;i<s;i++) { 3131 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3132 first_found = PETSC_TRUE; 3133 first_index = i; 3134 } 3135 nlocals += (PetscInt)PetscRealPart(array[i]); 3136 } 3137 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3138 if (!rank_prec_comm) { 3139 dof_displs[0]=0; 3140 for (i=1;i<size_prec_comm;i++) { 3141 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3142 } 3143 } 3144 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3145 if (first_found) { 3146 array[first_index] += (PetscScalar)nlocals; 3147 old_index = first_index; 3148 for (i=first_index+1;i<s;i++) { 3149 if (PetscRealPart(array[i]) > 0.1) { 3150 array[i] += array[old_index]; 3151 old_index = i; 3152 } 3153 } 3154 } 3155 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3156 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3157 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3158 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3159 /* get global ordering of local dofs */ 3160 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3161 if (local_dofs_mult) { 3162 for (i=0;i<n_local_dofs;i++) { 3163 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3164 } 3165 } else { 3166 for (i=0;i<n_local_dofs;i++) { 3167 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3168 } 3169 } 3170 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3171 /* free workspace */ 3172 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3173 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3174 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3175 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3176 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3177 /* return pointer to global ordering of local dofs */ 3178 *global_numbering_subset = temp_global_dofs; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3184 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3185 { 3186 PetscInt i,j; 3187 PetscScalar *alphas; 3188 PetscErrorCode ierr; 3189 3190 PetscFunctionBegin; 3191 /* this implements stabilized Gram-Schmidt */ 3192 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3193 for (i=0;i<n;i++) { 3194 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3195 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3196 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3197 } 3198 ierr = PetscFree(alphas);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 3202 #undef __FUNCT__ 3203 #define __FUNCT__ "MatISGetSubassemblingPattern" 3204 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3205 { 3206 Mat subdomain_adj; 3207 IS new_ranks,ranks_send_to; 3208 MatPartitioning partitioner; 3209 Mat_IS *matis; 3210 PetscInt n_neighs,*neighs,*n_shared,**shared; 3211 PetscInt prank; 3212 PetscMPIInt size,rank,color; 3213 PetscInt *xadj,*adjncy,*oldranks; 3214 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3215 PetscInt i,local_size,threshold=0; 3216 PetscErrorCode ierr; 3217 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3218 PetscSubcomm subcomm; 3219 3220 PetscFunctionBegin; 3221 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3222 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3223 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3224 3225 /* Get info on mapping */ 3226 matis = (Mat_IS*)(mat->data); 3227 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3228 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3229 3230 /* build local CSR graph of subdomains' connectivity */ 3231 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3232 xadj[0] = 0; 3233 xadj[1] = PetscMax(n_neighs-1,0); 3234 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3235 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3236 3237 if (threshold) { 3238 PetscInt xadj_count = 0; 3239 for (i=1;i<n_neighs;i++) { 3240 if (n_shared[i] > threshold) { 3241 adjncy[xadj_count] = neighs[i]; 3242 adjncy_wgt[xadj_count] = n_shared[i]; 3243 xadj_count++; 3244 } 3245 } 3246 xadj[1] = xadj_count; 3247 } else { 3248 if (xadj[1]) { 3249 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3250 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3251 } 3252 } 3253 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3254 if (use_square) { 3255 for (i=0;i<xadj[1];i++) { 3256 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3257 } 3258 } 3259 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3260 3261 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3262 3263 /* 3264 Restrict work on active processes only. 3265 */ 3266 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3267 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3268 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3269 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3270 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3271 if (color) { 3272 ierr = PetscFree(xadj);CHKERRQ(ierr); 3273 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3274 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3275 } else { 3276 PetscInt coarsening_ratio; 3277 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3278 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3279 prank = rank; 3280 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3281 /* 3282 for (i=0;i<size;i++) { 3283 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3284 } 3285 */ 3286 for (i=0;i<xadj[1];i++) { 3287 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3288 } 3289 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3290 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3291 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3292 3293 /* Partition */ 3294 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3295 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3296 if (use_vwgt) { 3297 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3298 v_wgt[0] = local_size; 3299 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3300 } 3301 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3302 coarsening_ratio = size/n_subdomains; 3303 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3304 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3305 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3306 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3307 3308 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3309 if (contiguous) { 3310 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3311 } else { 3312 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3313 } 3314 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3315 /* clean up */ 3316 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3317 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3318 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3319 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3320 } 3321 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3322 3323 /* assemble parallel IS for sends */ 3324 i = 1; 3325 if (color) i=0; 3326 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3327 3328 /* get back IS */ 3329 *is_sends = ranks_send_to; 3330 PetscFunctionReturn(0); 3331 } 3332 3333 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "MatISSubassemble" 3337 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3338 { 3339 Mat local_mat; 3340 Mat_IS *matis; 3341 IS is_sends_internal; 3342 PetscInt rows,cols,new_local_rows; 3343 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3344 PetscBool ismatis,isdense,newisdense,destroy_mat; 3345 ISLocalToGlobalMapping l2gmap; 3346 PetscInt* l2gmap_indices; 3347 const PetscInt* is_indices; 3348 MatType new_local_type; 3349 /* buffers */ 3350 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3351 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3352 PetscInt *recv_buffer_idxs_local; 3353 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3354 /* MPI */ 3355 MPI_Comm comm,comm_n; 3356 PetscSubcomm subcomm; 3357 PetscMPIInt n_sends,n_recvs,commsize; 3358 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3359 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3360 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3361 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3362 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3363 PetscErrorCode ierr; 3364 3365 PetscFunctionBegin; 3366 /* TODO: add missing checks */ 3367 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3368 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3369 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3370 PetscValidLogicalCollectiveInt(mat,nis,7); 3371 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3372 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3373 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3374 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3375 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3376 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3377 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3378 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3379 PetscInt mrows,mcols,mnrows,mncols; 3380 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3381 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3382 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3383 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3384 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3385 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3386 } 3387 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3388 PetscValidLogicalCollectiveInt(mat,bs,0); 3389 /* prepare IS for sending if not provided */ 3390 if (!is_sends) { 3391 PetscBool pcontig = PETSC_TRUE; 3392 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3393 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3394 } else { 3395 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3396 is_sends_internal = is_sends; 3397 } 3398 3399 /* get pointer of MATIS data */ 3400 matis = (Mat_IS*)mat->data; 3401 3402 /* get comm */ 3403 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3404 3405 /* compute number of sends */ 3406 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3407 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3408 3409 /* compute number of receives */ 3410 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3411 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3412 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3413 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3414 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3415 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3416 ierr = PetscFree(iflags);CHKERRQ(ierr); 3417 3418 /* restrict comm if requested */ 3419 subcomm = 0; 3420 destroy_mat = PETSC_FALSE; 3421 if (restrict_comm) { 3422 PetscMPIInt color,rank,subcommsize; 3423 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3424 color = 0; 3425 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3426 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3427 subcommsize = commsize - subcommsize; 3428 /* check if reuse has been requested */ 3429 if (reuse == MAT_REUSE_MATRIX) { 3430 if (*mat_n) { 3431 PetscMPIInt subcommsize2; 3432 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3433 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3434 comm_n = PetscObjectComm((PetscObject)*mat_n); 3435 } else { 3436 comm_n = PETSC_COMM_SELF; 3437 } 3438 } else { /* MAT_INITIAL_MATRIX */ 3439 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3440 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3441 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3442 comm_n = subcomm->comm; 3443 } 3444 /* flag to destroy *mat_n if not significative */ 3445 if (color) destroy_mat = PETSC_TRUE; 3446 } else { 3447 comm_n = comm; 3448 } 3449 3450 /* prepare send/receive buffers */ 3451 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3452 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3453 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3454 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3455 if (nis) { 3456 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3457 } 3458 3459 /* Get data from local matrices */ 3460 if (!isdense) { 3461 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3462 /* TODO: See below some guidelines on how to prepare the local buffers */ 3463 /* 3464 send_buffer_vals should contain the raw values of the local matrix 3465 send_buffer_idxs should contain: 3466 - MatType_PRIVATE type 3467 - PetscInt size_of_l2gmap 3468 - PetscInt global_row_indices[size_of_l2gmap] 3469 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3470 */ 3471 } else { 3472 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3473 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3474 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3475 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3476 send_buffer_idxs[1] = i; 3477 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3478 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3479 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3480 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3481 for (i=0;i<n_sends;i++) { 3482 ilengths_vals[is_indices[i]] = len*len; 3483 ilengths_idxs[is_indices[i]] = len+2; 3484 } 3485 } 3486 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3487 /* additional is (if any) */ 3488 if (nis) { 3489 PetscMPIInt psum; 3490 PetscInt j; 3491 for (j=0,psum=0;j<nis;j++) { 3492 PetscInt plen; 3493 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3494 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3495 psum += len+1; /* indices + lenght */ 3496 } 3497 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3498 for (j=0,psum=0;j<nis;j++) { 3499 PetscInt plen; 3500 const PetscInt *is_array_idxs; 3501 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3502 send_buffer_idxs_is[psum] = plen; 3503 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3504 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3506 psum += plen+1; /* indices + lenght */ 3507 } 3508 for (i=0;i<n_sends;i++) { 3509 ilengths_idxs_is[is_indices[i]] = psum; 3510 } 3511 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3512 } 3513 3514 buf_size_idxs = 0; 3515 buf_size_vals = 0; 3516 buf_size_idxs_is = 0; 3517 for (i=0;i<n_recvs;i++) { 3518 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3519 buf_size_vals += (PetscInt)olengths_vals[i]; 3520 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3521 } 3522 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3523 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3524 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3525 3526 /* get new tags for clean communications */ 3527 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3528 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3529 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3530 3531 /* allocate for requests */ 3532 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3533 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3534 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3536 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3537 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3538 3539 /* communications */ 3540 ptr_idxs = recv_buffer_idxs; 3541 ptr_vals = recv_buffer_vals; 3542 ptr_idxs_is = recv_buffer_idxs_is; 3543 for (i=0;i<n_recvs;i++) { 3544 source_dest = onodes[i]; 3545 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3546 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3547 ptr_idxs += olengths_idxs[i]; 3548 ptr_vals += olengths_vals[i]; 3549 if (nis) { 3550 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3551 ptr_idxs_is += olengths_idxs_is[i]; 3552 } 3553 } 3554 for (i=0;i<n_sends;i++) { 3555 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3556 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3557 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3558 if (nis) { 3559 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3560 } 3561 } 3562 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3563 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3564 3565 /* assemble new l2g map */ 3566 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3567 ptr_idxs = recv_buffer_idxs; 3568 new_local_rows = 0; 3569 for (i=0;i<n_recvs;i++) { 3570 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3571 ptr_idxs += olengths_idxs[i]; 3572 } 3573 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3574 ptr_idxs = recv_buffer_idxs; 3575 new_local_rows = 0; 3576 for (i=0;i<n_recvs;i++) { 3577 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3578 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3579 ptr_idxs += olengths_idxs[i]; 3580 } 3581 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3582 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3583 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3584 3585 /* infer new local matrix type from received local matrices type */ 3586 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3587 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3588 if (n_recvs) { 3589 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3590 ptr_idxs = recv_buffer_idxs; 3591 for (i=0;i<n_recvs;i++) { 3592 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3593 new_local_type_private = MATAIJ_PRIVATE; 3594 break; 3595 } 3596 ptr_idxs += olengths_idxs[i]; 3597 } 3598 switch (new_local_type_private) { 3599 case MATDENSE_PRIVATE: 3600 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3601 new_local_type = MATSEQAIJ; 3602 bs = 1; 3603 } else { /* if I receive only 1 dense matrix */ 3604 new_local_type = MATSEQDENSE; 3605 bs = 1; 3606 } 3607 break; 3608 case MATAIJ_PRIVATE: 3609 new_local_type = MATSEQAIJ; 3610 bs = 1; 3611 break; 3612 case MATBAIJ_PRIVATE: 3613 new_local_type = MATSEQBAIJ; 3614 break; 3615 case MATSBAIJ_PRIVATE: 3616 new_local_type = MATSEQSBAIJ; 3617 break; 3618 default: 3619 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3620 break; 3621 } 3622 } else { /* by default, new_local_type is seqdense */ 3623 new_local_type = MATSEQDENSE; 3624 bs = 1; 3625 } 3626 3627 /* create MATIS object if needed */ 3628 if (reuse == MAT_INITIAL_MATRIX) { 3629 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3630 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3631 } else { 3632 /* it also destroys the local matrices */ 3633 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3634 } 3635 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3636 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3637 3638 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3639 3640 /* Global to local map of received indices */ 3641 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3642 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3643 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3644 3645 /* restore attributes -> type of incoming data and its size */ 3646 buf_size_idxs = 0; 3647 for (i=0;i<n_recvs;i++) { 3648 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3649 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3650 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3651 } 3652 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3653 3654 /* set preallocation */ 3655 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3656 if (!newisdense) { 3657 PetscInt *new_local_nnz=0; 3658 3659 ptr_vals = recv_buffer_vals; 3660 ptr_idxs = recv_buffer_idxs_local; 3661 if (n_recvs) { 3662 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3663 } 3664 for (i=0;i<n_recvs;i++) { 3665 PetscInt j; 3666 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3667 for (j=0;j<*(ptr_idxs+1);j++) { 3668 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3669 } 3670 } else { 3671 /* TODO */ 3672 } 3673 ptr_idxs += olengths_idxs[i]; 3674 } 3675 if (new_local_nnz) { 3676 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3677 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3678 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3679 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3680 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3681 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3682 } else { 3683 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3684 } 3685 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3686 } else { 3687 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3688 } 3689 3690 /* set values */ 3691 ptr_vals = recv_buffer_vals; 3692 ptr_idxs = recv_buffer_idxs_local; 3693 for (i=0;i<n_recvs;i++) { 3694 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3695 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3696 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3697 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3698 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3700 } else { 3701 /* TODO */ 3702 } 3703 ptr_idxs += olengths_idxs[i]; 3704 ptr_vals += olengths_vals[i]; 3705 } 3706 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3710 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3711 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3712 3713 #if 0 3714 if (!restrict_comm) { /* check */ 3715 Vec lvec,rvec; 3716 PetscReal infty_error; 3717 3718 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3719 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3720 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3721 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3722 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3723 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3724 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3725 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3726 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3727 } 3728 #endif 3729 3730 /* assemble new additional is (if any) */ 3731 if (nis) { 3732 PetscInt **temp_idxs,*count_is,j,psum; 3733 3734 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3735 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3736 ptr_idxs = recv_buffer_idxs_is; 3737 psum = 0; 3738 for (i=0;i<n_recvs;i++) { 3739 for (j=0;j<nis;j++) { 3740 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3741 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3742 psum += plen; 3743 ptr_idxs += plen+1; /* shift pointer to received data */ 3744 } 3745 } 3746 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3747 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3748 for (i=1;i<nis;i++) { 3749 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3750 } 3751 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3752 ptr_idxs = recv_buffer_idxs_is; 3753 for (i=0;i<n_recvs;i++) { 3754 for (j=0;j<nis;j++) { 3755 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3756 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3757 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3758 ptr_idxs += plen+1; /* shift pointer to received data */ 3759 } 3760 } 3761 for (i=0;i<nis;i++) { 3762 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3763 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3764 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3765 } 3766 ierr = PetscFree(count_is);CHKERRQ(ierr); 3767 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3768 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3769 } 3770 /* free workspace */ 3771 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3772 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3773 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3774 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3775 if (isdense) { 3776 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3777 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3778 } else { 3779 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3780 } 3781 if (nis) { 3782 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3783 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3784 } 3785 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3786 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3787 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3788 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3789 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3790 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3791 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3792 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3793 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3794 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3795 ierr = PetscFree(onodes);CHKERRQ(ierr); 3796 if (nis) { 3797 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3798 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3799 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3800 } 3801 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3802 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3803 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3804 for (i=0;i<nis;i++) { 3805 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3806 } 3807 } 3808 PetscFunctionReturn(0); 3809 } 3810 3811 /* temporary hack into ksp private data structure */ 3812 #include <petsc-private/kspimpl.h> 3813 3814 #undef __FUNCT__ 3815 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3816 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3817 { 3818 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3819 PC_IS *pcis = (PC_IS*)pc->data; 3820 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3821 MatNullSpace CoarseNullSpace=NULL; 3822 ISLocalToGlobalMapping coarse_islg; 3823 IS coarse_is,*isarray; 3824 PetscInt i,im_active=-1,active_procs=-1; 3825 PetscInt nis,nisdofs,nisneu; 3826 PC pc_temp; 3827 PCType coarse_pc_type; 3828 KSPType coarse_ksp_type; 3829 PetscBool multilevel_requested,multilevel_allowed; 3830 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3831 Mat t_coarse_mat_is; 3832 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3833 PetscMPIInt all_procs; 3834 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3835 PetscBool compute_vecs = PETSC_FALSE; 3836 PetscScalar *array; 3837 PetscErrorCode ierr; 3838 3839 PetscFunctionBegin; 3840 /* Assign global numbering to coarse dofs */ 3841 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3842 compute_vecs = PETSC_TRUE; 3843 PetscInt ocoarse_size; 3844 ocoarse_size = pcbddc->coarse_size; 3845 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3846 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3847 /* see if we can avoid some work */ 3848 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3849 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3850 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3851 coarse_reuse = PETSC_FALSE; 3852 } else { /* we can safely reuse already computed coarse matrix */ 3853 coarse_reuse = PETSC_TRUE; 3854 } 3855 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3856 coarse_reuse = PETSC_FALSE; 3857 } 3858 /* reset any subassembling information */ 3859 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3860 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3861 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3862 coarse_reuse = PETSC_TRUE; 3863 } 3864 3865 /* count "active" (i.e. with positive local size) and "void" processes */ 3866 im_active = !!(pcis->n); 3867 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3868 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3869 void_procs = all_procs-active_procs; 3870 csin_type_simple = PETSC_TRUE; 3871 redist = PETSC_FALSE; 3872 if (pcbddc->current_level && void_procs) { 3873 csin_ml = PETSC_TRUE; 3874 ncoarse_ml = void_procs; 3875 csin_ds = PETSC_TRUE; 3876 ncoarse_ds = void_procs; 3877 } else { 3878 csin_ml = PETSC_FALSE; 3879 ncoarse_ml = all_procs; 3880 if (void_procs) { 3881 csin_ds = PETSC_TRUE; 3882 ncoarse_ds = void_procs; 3883 csin_type_simple = PETSC_FALSE; 3884 } else { 3885 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3886 csin_ds = PETSC_TRUE; 3887 ncoarse_ds = pcbddc->redistribute_coarse; 3888 redist = PETSC_TRUE; 3889 } else { 3890 csin_ds = PETSC_FALSE; 3891 ncoarse_ds = all_procs; 3892 } 3893 } 3894 } 3895 3896 /* 3897 test if we can go multilevel: three conditions must be satisfied: 3898 - we have not exceeded the number of levels requested 3899 - we can actually subassemble the active processes 3900 - we can find a suitable number of MPI processes where we can place the subassembled problem 3901 */ 3902 multilevel_allowed = PETSC_FALSE; 3903 multilevel_requested = PETSC_FALSE; 3904 if (pcbddc->current_level < pcbddc->max_levels) { 3905 multilevel_requested = PETSC_TRUE; 3906 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3907 multilevel_allowed = PETSC_FALSE; 3908 } else { 3909 multilevel_allowed = PETSC_TRUE; 3910 } 3911 } 3912 /* determine number of process partecipating to coarse solver */ 3913 if (multilevel_allowed) { 3914 ncoarse = ncoarse_ml; 3915 csin = csin_ml; 3916 } else { 3917 ncoarse = ncoarse_ds; 3918 csin = csin_ds; 3919 } 3920 3921 /* creates temporary l2gmap and IS for coarse indexes */ 3922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3923 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3924 3925 /* creates temporary MATIS object for coarse matrix */ 3926 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3927 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3928 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3929 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3930 #if 0 3931 { 3932 PetscViewer viewer; 3933 char filename[256]; 3934 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3935 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3936 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3937 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3938 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3939 } 3940 #endif 3941 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3942 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3943 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3944 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3945 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3946 3947 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3948 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3949 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3950 const PetscInt *idxs; 3951 ISLocalToGlobalMapping tmap; 3952 3953 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3954 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3955 /* allocate space for temporary storage */ 3956 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3957 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3958 /* allocate for IS array */ 3959 nisdofs = pcbddc->n_ISForDofsLocal; 3960 nisneu = !!pcbddc->NeumannBoundariesLocal; 3961 nis = nisdofs + nisneu; 3962 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3963 /* dofs splitting */ 3964 for (i=0;i<nisdofs;i++) { 3965 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3966 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3967 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3968 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3969 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3970 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3971 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3972 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3973 } 3974 /* neumann boundaries */ 3975 if (pcbddc->NeumannBoundariesLocal) { 3976 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3977 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3978 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3979 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3980 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3981 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3983 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3984 } 3985 /* free memory */ 3986 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3987 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3988 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3989 } else { 3990 nis = 0; 3991 nisdofs = 0; 3992 nisneu = 0; 3993 isarray = NULL; 3994 } 3995 /* destroy no longer needed map */ 3996 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3997 3998 /* restrict on coarse candidates (if needed) */ 3999 coarse_mat_is = NULL; 4000 if (csin) { 4001 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4002 if (redist) { 4003 PetscMPIInt rank; 4004 PetscInt spc,n_spc_p1,dest[1]; 4005 4006 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4007 spc = all_procs/pcbddc->redistribute_coarse; 4008 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 4009 if (rank > n_spc_p1*(spc+1)-1) { 4010 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4011 } else { 4012 dest[0] = rank/(spc+1); 4013 } 4014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4015 } else { 4016 PetscInt j,tissize,*nisindices; 4017 PetscInt *coarse_candidates; 4018 const PetscInt* tisindices; 4019 /* get coarse candidates' ranks in pc communicator */ 4020 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4021 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4022 for (i=0,j=0;i<all_procs;i++) { 4023 if (!coarse_candidates[i]) { 4024 coarse_candidates[j]=i; 4025 j++; 4026 } 4027 } 4028 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 4029 /* get a suitable subassembling pattern */ 4030 if (csin_type_simple) { 4031 PetscMPIInt rank; 4032 PetscInt issize,isidx; 4033 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4034 if (im_active) { 4035 issize = 1; 4036 isidx = (PetscInt)rank; 4037 } else { 4038 issize = 0; 4039 isidx = -1; 4040 } 4041 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4042 } else { 4043 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4044 } 4045 if (pcbddc->dbg_flag) { 4046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4047 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4048 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4050 for (i=0;i<j;i++) { 4051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4052 } 4053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4054 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4055 } 4056 /* shift the pattern on coarse candidates */ 4057 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4058 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4059 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4060 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4061 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4062 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4063 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4064 } 4065 } 4066 if (pcbddc->dbg_flag) { 4067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4069 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4070 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4071 } 4072 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4073 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4074 } else { 4075 if (pcbddc->dbg_flag) { 4076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4077 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4078 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4079 } 4080 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4081 coarse_mat_is = t_coarse_mat_is; 4082 } 4083 4084 /* create local to global scatters for coarse problem */ 4085 if (compute_vecs) { 4086 PetscInt lrows; 4087 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4088 if (coarse_mat_is) { 4089 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4090 } else { 4091 lrows = 0; 4092 } 4093 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4094 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4095 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4096 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4097 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4098 } 4099 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4100 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4101 4102 /* set defaults for coarse KSP and PC */ 4103 if (multilevel_allowed) { 4104 coarse_ksp_type = KSPRICHARDSON; 4105 coarse_pc_type = PCBDDC; 4106 } else { 4107 coarse_ksp_type = KSPPREONLY; 4108 coarse_pc_type = PCREDUNDANT; 4109 } 4110 4111 /* print some info if requested */ 4112 if (pcbddc->dbg_flag) { 4113 if (!multilevel_allowed) { 4114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4115 if (multilevel_requested) { 4116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4117 } else if (pcbddc->max_levels) { 4118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4119 } 4120 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4121 } 4122 } 4123 4124 /* create the coarse KSP object only once with defaults */ 4125 if (coarse_mat_is) { 4126 MatReuse coarse_mat_reuse; 4127 PetscViewer dbg_viewer = NULL; 4128 if (pcbddc->dbg_flag) { 4129 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4130 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4131 } 4132 if (!pcbddc->coarse_ksp) { 4133 char prefix[256],str_level[16]; 4134 size_t len; 4135 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4137 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4138 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4139 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4140 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4141 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4142 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4143 /* prefix */ 4144 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4145 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4146 if (!pcbddc->current_level) { 4147 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4148 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4149 } else { 4150 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4151 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4152 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4153 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4154 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4155 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4156 } 4157 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4158 /* allow user customization */ 4159 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4160 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4161 } 4162 4163 /* get some info after set from options */ 4164 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4165 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4166 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4167 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4168 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4169 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4170 isbddc = PETSC_FALSE; 4171 } 4172 if (isredundant) { 4173 KSP inner_ksp; 4174 PC inner_pc; 4175 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4176 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4177 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4178 } 4179 4180 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4181 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4182 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4183 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4184 if (nisdofs) { 4185 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4186 for (i=0;i<nisdofs;i++) { 4187 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4188 } 4189 } 4190 if (nisneu) { 4191 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4192 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4193 } 4194 4195 /* assemble coarse matrix */ 4196 if (coarse_reuse) { 4197 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4198 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4199 coarse_mat_reuse = MAT_REUSE_MATRIX; 4200 } else { 4201 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4202 } 4203 if (isbddc || isnn) { 4204 if (pcbddc->coarsening_ratio > 1) { 4205 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4206 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4207 if (pcbddc->dbg_flag) { 4208 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4209 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4210 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4211 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4212 } 4213 } 4214 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4215 } else { 4216 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4217 coarse_mat = coarse_mat_is; 4218 } 4219 } else { 4220 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4221 } 4222 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4223 4224 /* propagate symmetry info to coarse matrix */ 4225 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4226 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4227 4228 /* set operators */ 4229 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4230 if (pcbddc->dbg_flag) { 4231 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4232 } 4233 } else { /* processes non partecipating to coarse solver (if any) */ 4234 coarse_mat = 0; 4235 } 4236 ierr = PetscFree(isarray);CHKERRQ(ierr); 4237 #if 0 4238 { 4239 PetscViewer viewer; 4240 char filename[256]; 4241 sprintf(filename,"coarse_mat.m"); 4242 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4243 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4244 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4245 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4246 } 4247 #endif 4248 4249 /* Compute coarse null space (special handling by BDDC only) */ 4250 if (pcbddc->NullSpace) { 4251 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4252 } 4253 4254 if (pcbddc->coarse_ksp) { 4255 Vec crhs,csol; 4256 PetscBool ispreonly; 4257 if (CoarseNullSpace) { 4258 if (isbddc) { 4259 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4260 } else { 4261 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4262 } 4263 } 4264 /* setup coarse ksp */ 4265 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4266 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4267 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4268 /* hack */ 4269 if (!csol) { 4270 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4271 } 4272 if (!crhs) { 4273 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4274 } 4275 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4276 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4277 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4278 KSP check_ksp; 4279 KSPType check_ksp_type; 4280 PC check_pc; 4281 Vec check_vec,coarse_vec; 4282 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4283 PetscInt its; 4284 PetscBool compute_eigs; 4285 PetscReal *eigs_r,*eigs_c; 4286 PetscInt neigs; 4287 const char *prefix; 4288 4289 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4290 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4291 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4292 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4293 if (ispreonly) { 4294 check_ksp_type = KSPPREONLY; 4295 compute_eigs = PETSC_FALSE; 4296 } else { 4297 check_ksp_type = KSPGMRES; 4298 compute_eigs = PETSC_TRUE; 4299 } 4300 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4301 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4302 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4303 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4304 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4305 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4306 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4307 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4308 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4309 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4310 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4311 /* create random vec */ 4312 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4313 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4314 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4315 if (CoarseNullSpace) { 4316 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4317 } 4318 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4319 /* solve coarse problem */ 4320 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4321 if (CoarseNullSpace) { 4322 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4323 } 4324 /* set eigenvalue estimation if preonly has not been requested */ 4325 if (compute_eigs) { 4326 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4327 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4328 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4329 lambda_max = eigs_r[neigs-1]; 4330 lambda_min = eigs_r[0]; 4331 if (pcbddc->use_coarse_estimates) { 4332 if (lambda_max>lambda_min) { 4333 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4334 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4335 } 4336 } 4337 } 4338 4339 /* check coarse problem residual error */ 4340 if (pcbddc->dbg_flag) { 4341 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4342 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4343 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4344 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4345 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4346 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4347 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4348 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4349 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4350 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4351 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4352 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4353 if (compute_eigs) { 4354 PetscReal lambda_max_s,lambda_min_s; 4355 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4356 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4357 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4358 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4359 for (i=0;i<neigs;i++) { 4360 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4361 } 4362 } 4363 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4364 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4365 } 4366 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4367 if (compute_eigs) { 4368 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4369 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4370 } 4371 } 4372 } 4373 /* print additional info */ 4374 if (pcbddc->dbg_flag) { 4375 /* waits until all processes reaches this point */ 4376 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4377 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4378 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4379 } 4380 4381 /* free memory */ 4382 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4383 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 #undef __FUNCT__ 4388 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4389 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4390 { 4391 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4392 PC_IS* pcis = (PC_IS*)pc->data; 4393 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4394 PetscInt i,coarse_size; 4395 PetscInt *local_primal_indices; 4396 PetscErrorCode ierr; 4397 4398 PetscFunctionBegin; 4399 /* Compute global number of coarse dofs */ 4400 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4401 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4402 } 4403 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4404 4405 /* check numbering */ 4406 if (pcbddc->dbg_flag) { 4407 PetscScalar coarsesum,*array; 4408 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4409 4410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4411 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4413 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4414 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4415 for (i=0;i<pcbddc->local_primal_size;i++) { 4416 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4417 } 4418 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4419 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4420 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4421 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4422 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4423 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4424 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4425 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4426 for (i=0;i<pcis->n;i++) { 4427 if (array[i] == 1.0) { 4428 set_error = PETSC_TRUE; 4429 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4430 } 4431 } 4432 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4433 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4434 for (i=0;i<pcis->n;i++) { 4435 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4436 } 4437 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4438 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4439 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4440 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4441 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4442 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4443 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4444 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4445 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4446 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4447 for (i=0;i<pcbddc->local_primal_size;i++) { 4448 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4449 } 4450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4451 } 4452 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4453 if (set_error_reduced) { 4454 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4455 } 4456 } 4457 /* get back data */ 4458 *coarse_size_n = coarse_size; 4459 *local_primal_indices_n = local_primal_indices; 4460 PetscFunctionReturn(0); 4461 } 4462 4463 #undef __FUNCT__ 4464 #define __FUNCT__ "PCBDDCGlobalToLocal" 4465 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4466 { 4467 IS localis_t; 4468 PetscInt i,lsize,*idxs,n; 4469 PetscScalar *vals; 4470 PetscErrorCode ierr; 4471 4472 PetscFunctionBegin; 4473 /* get indices in local ordering exploiting local to global map */ 4474 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4475 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4476 for (i=0;i<lsize;i++) vals[i] = 1.0; 4477 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4478 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4479 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4480 if (idxs) { /* multilevel guard */ 4481 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4482 } 4483 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4484 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4485 ierr = PetscFree(vals);CHKERRQ(ierr); 4486 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4487 /* now compute set in local ordering */ 4488 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4489 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4490 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4491 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4492 for (i=0,lsize=0;i<n;i++) { 4493 if (PetscRealPart(vals[i]) > 0.5) { 4494 lsize++; 4495 } 4496 } 4497 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4498 for (i=0,lsize=0;i<n;i++) { 4499 if (PetscRealPart(vals[i]) > 0.5) { 4500 idxs[lsize++] = i; 4501 } 4502 } 4503 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4504 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4505 *localis = localis_t; 4506 PetscFunctionReturn(0); 4507 } 4508 4509 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4510 #undef __FUNCT__ 4511 #define __FUNCT__ "PCBDDCMatMult_Private" 4512 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4513 { 4514 PCBDDCChange_ctx change_ctx; 4515 PetscErrorCode ierr; 4516 4517 PetscFunctionBegin; 4518 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4519 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4520 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4521 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 4525 #undef __FUNCT__ 4526 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4527 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4528 { 4529 PCBDDCChange_ctx change_ctx; 4530 PetscErrorCode ierr; 4531 4532 PetscFunctionBegin; 4533 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4534 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4535 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4536 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4537 PetscFunctionReturn(0); 4538 } 4539 4540 #undef __FUNCT__ 4541 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4542 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4543 { 4544 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4545 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4546 PetscInt *used_xadj,*used_adjncy; 4547 PetscBool free_used_adj; 4548 PetscErrorCode ierr; 4549 4550 PetscFunctionBegin; 4551 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4552 free_used_adj = PETSC_FALSE; 4553 if (pcbddc->sub_schurs_layers == -1) { 4554 used_xadj = NULL; 4555 used_adjncy = NULL; 4556 } else { 4557 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4558 used_xadj = pcbddc->mat_graph->xadj; 4559 used_adjncy = pcbddc->mat_graph->adjncy; 4560 } else if (pcbddc->computed_rowadj) { 4561 used_xadj = pcbddc->mat_graph->xadj; 4562 used_adjncy = pcbddc->mat_graph->adjncy; 4563 } else { 4564 Mat mat_adj; 4565 PetscBool flg_row=PETSC_TRUE; 4566 const PetscInt *xadj,*adjncy; 4567 PetscInt nvtxs; 4568 4569 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4570 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4571 if (!flg_row) { 4572 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4573 } 4574 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4575 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4576 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4577 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4578 if (!flg_row) { 4579 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4580 } 4581 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4582 free_used_adj = PETSC_TRUE; 4583 } 4584 } 4585 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4586 4587 /* free adjacency */ 4588 if (free_used_adj) { 4589 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4590 } 4591 PetscFunctionReturn(0); 4592 } 4593 4594 #undef __FUNCT__ 4595 #define __FUNCT__ "PCBDDCInitSubSchurs" 4596 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4597 { 4598 PC_IS *pcis=(PC_IS*)pc->data; 4599 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4600 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4601 PCBDDCGraph graph; 4602 Mat S_j; 4603 PetscErrorCode ierr; 4604 4605 PetscFunctionBegin; 4606 /* attach interface graph for determining subsets */ 4607 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4608 IS verticesIS; 4609 4610 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4611 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4612 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4613 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4614 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4615 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4616 /* 4617 if (pcbddc->dbg_flag) { 4618 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4619 } 4620 */ 4621 } else { 4622 graph = pcbddc->mat_graph; 4623 } 4624 4625 /* Create Schur complement matrix */ 4626 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4627 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4628 4629 /* sub_schurs init */ 4630 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4631 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4632 /* free graph struct */ 4633 if (pcbddc->sub_schurs_rebuild) { 4634 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4635 } 4636 PetscFunctionReturn(0); 4637 } 4638