1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 /* max size of subsets */ 36 mss = 0; 37 for (i=0;i<sub_schurs->n_subs;i++) { 38 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 39 PetscInt subset_size; 40 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 41 mss = PetscMax(mss,subset_size); 42 } 43 } 44 45 /* min/max and threshold */ 46 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 47 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 48 nmax = PetscMax(nmin,nmax); 49 if (pcbddc->adaptive_threshold > 1.0) { 50 thresh = 1.0/pcbddc->adaptive_threshold; 51 } else { 52 thresh = 1.0; 53 } 54 55 /* allocate lapack workspace */ 56 cum = cum2 = 0; 57 maxneigs = 0; 58 for (i=0;i<sub_schurs->n_subs;i++) { 59 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 60 PetscInt n,subset_size; 61 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 62 n = PetscMin(subset_size,nmax); 63 cum += subset_size*n; 64 cum2 += n; 65 maxneigs = PetscMax(maxneigs,n); 66 } 67 } 68 69 if (mss) { 70 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 71 PetscBLASInt B_itype = 1; 72 PetscBLASInt B_N = mss; 73 PetscScalar zero = 0.0; 74 PetscScalar eps = 0.0; /* dlamch? */ 75 76 B_lwork = -1; 77 S = NULL; 78 St = NULL; 79 eigs = NULL; 80 eigv = NULL; 81 B_iwork = NULL; 82 B_ifail = NULL; 83 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 84 #if defined(PETSC_USE_COMPLEX) 85 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 86 #else 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 88 #endif 89 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 90 ierr = PetscFPTrapPop();CHKERRQ(ierr); 91 } else { 92 /* TODO */ 93 } 94 } else { 95 lwork = 0; 96 } 97 98 nv = 0; 99 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 100 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 101 } 102 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 103 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 104 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 105 #if defined(PETSC_USE_COMPLEX) 106 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 107 #endif 108 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 109 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 110 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 111 nv+cum,&pcbddc->adaptive_constraints_idxs, 112 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 113 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 114 115 maxneigs = 0; 116 cum = cum2 = cumarray = 0; 117 if (sub_schurs->is_Ej_com) { 118 const PetscInt *idxs; 119 120 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 121 for (cum=0;cum<nv;cum++) { 122 pcbddc->adaptive_constraints_n[cum] = 1; 123 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 124 pcbddc->adaptive_constraints_ptrs[cum] = cum; 125 pcbddc->adaptive_constraints_data[cum] = 1.0; 126 } 127 cum2 = cum; 128 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 } 130 131 if (mss) { /* multilevel */ 132 if (pcbddc->use_deluxe_scaling) { 133 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 134 } else { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 136 } 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 138 } 139 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,zero=0.0; 147 PetscBLASInt B_N; 148 149 /* S should be copied since we need it for deluxe scaling */ 150 if (sub_schurs->is_hermitian) { 151 PetscInt j; 152 for (j=0;j<subset_size;j++) { 153 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 154 } 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 } else { 159 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 160 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 /* is always this the right matrix? */ 163 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 165 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 166 /* St = Starray+cumarray; */ 167 168 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 169 PetscBLASInt B_itype = 1; 170 PetscBLASInt B_IL = 1, B_IU; 171 PetscScalar eps = -1.0; /* dlamch? */ 172 PetscInt nmin_s; 173 174 /* ask for eigenvalues lower than thresh */ 175 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 176 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 177 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 178 #if defined(PETSC_USE_COMPLEX) 179 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 180 #else 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 182 #endif 183 ierr = PetscFPTrapPop();CHKERRQ(ierr); 184 if (B_ierr) { 185 if (B_ierr < 0 ) { 186 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 187 } else if (B_ierr <= B_N) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 189 } else { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 191 } 192 } 193 194 if (B_neigs > nmax) { 195 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 196 B_neigs = nmax; 197 } 198 199 nmin_s = PetscMin(nmin,B_N); 200 if (B_neigs < nmin_s) { 201 PetscBLASInt B_neigs2; 202 203 B_IL = B_neigs + 1; 204 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 205 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 206 if (sub_schurs->is_hermitian) { 207 PetscInt j; 208 for (j=0;j<subset_size;j++) { 209 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 210 } 211 for (j=0;j<subset_size;j++) { 212 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 213 } 214 } else { 215 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 216 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 217 } 218 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 219 #if defined(PETSC_USE_COMPLEX) 220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 221 #else 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 223 #endif 224 ierr = PetscFPTrapPop();CHKERRQ(ierr); 225 B_neigs += B_neigs2; 226 } 227 if (B_ierr) { 228 if (B_ierr < 0 ) { 229 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 230 } else if (B_ierr <= B_N) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 232 } else { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 234 } 235 } 236 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 237 for (j=0;j<B_neigs;j++) { 238 if (eigs[j] == 0.0) { 239 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 240 } else { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 242 } 243 } 244 } else { 245 /* TODO */ 246 } 247 maxneigs = PetscMax(B_neigs,maxneigs); 248 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 249 250 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 251 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 252 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 #if 0 254 PetscInt ii; 255 for (ii=0;ii<B_neigs;ii++) { 256 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 257 for (j=0;j<B_N;j++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 259 } 260 } 261 #endif 262 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 263 for (j=0;j<B_neigs;j++) { 264 #if 0 265 { 266 PetscBLASInt Blas_N,Blas_one = 1.0; 267 PetscScalar norm; 268 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 269 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 270 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 271 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 272 } else { 273 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } 275 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 276 } 277 #endif 278 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 279 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 280 cum2 += subset_size; 281 } 282 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 283 } 284 /* shift for next computation */ 285 cumarray += subset_size*subset_size; 286 } 287 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 288 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 289 290 if (mss) { 291 if (pcbddc->use_deluxe_scaling) { 292 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 293 } else { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 295 } 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 297 } 298 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 299 #if defined(PETSC_USE_COMPLEX) 300 ierr = PetscFree(rwork);CHKERRQ(ierr); 301 #endif 302 if (pcbddc->dbg_flag) { 303 PetscInt maxneigs_r; 304 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 305 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "PCBDDCSetUpSolvers" 312 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 313 { 314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 315 PetscScalar *coarse_submat_vals; 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 /* Setup local scatters R_to_B and (optionally) R_to_D */ 320 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 321 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 322 323 /* Setup local neumann solver ksp_R */ 324 /* PCBDDCSetUpLocalScatters should be called first! */ 325 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 326 327 /* Change global null space passed in by the user if change of basis has been requested */ 328 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 329 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 330 } 331 332 /* 333 Setup local correction and local part of coarse basis. 334 Gives back the dense local part of the coarse matrix in column major ordering 335 */ 336 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 337 338 /* Compute total number of coarse nodes and setup coarse solver */ 339 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 340 341 /* free */ 342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCResetCustomization" 348 PetscErrorCode PCBDDCResetCustomization(PC pc) 349 { 350 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 355 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 356 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 358 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 360 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 361 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 362 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 363 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "PCBDDCResetTopography" 370 PetscErrorCode PCBDDCResetTopography(PC pc) 371 { 372 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 377 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 378 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 379 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 380 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCBDDCResetSolvers" 386 PetscErrorCode PCBDDCResetSolvers(PC pc) 387 { 388 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 389 PetscScalar *array; 390 PetscErrorCode ierr; 391 392 PetscFunctionBegin; 393 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 394 if (pcbddc->coarse_phi_B) { 395 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 396 ierr = PetscFree(array);CHKERRQ(ierr); 397 } 398 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 402 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 404 if (pcbddc->local_auxmat2) { 405 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 406 ierr = PetscFree(array);CHKERRQ(ierr); 407 } 408 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 409 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 410 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 412 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 413 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 416 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 420 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 422 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 429 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 430 { 431 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 432 PC_IS *pcis = (PC_IS*)pc->data; 433 VecType impVecType; 434 PetscInt n_constraints,n_R,old_size; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 if (!pcbddc->ConstraintMatrix) { 439 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 440 } 441 /* get sizes */ 442 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 443 n_R = pcis->n-pcbddc->n_actual_vertices; 444 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 445 /* local work vectors (try to avoid unneeded work)*/ 446 /* R nodes */ 447 old_size = -1; 448 if (pcbddc->vec1_R) { 449 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 450 } 451 if (n_R != old_size) { 452 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 453 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 454 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 455 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 456 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 457 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 458 } 459 /* local primal dofs */ 460 old_size = -1; 461 if (pcbddc->vec1_P) { 462 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 463 } 464 if (pcbddc->local_primal_size != old_size) { 465 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 466 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 468 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 469 } 470 /* local explicit constraints */ 471 old_size = -1; 472 if (pcbddc->vec1_C) { 473 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 474 } 475 if (n_constraints && n_constraints != old_size) { 476 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 477 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 479 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCSetUpCorrection" 486 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 487 { 488 PetscErrorCode ierr; 489 /* pointers to pcis and pcbddc */ 490 PC_IS* pcis = (PC_IS*)pc->data; 491 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 492 /* submatrices of local problem */ 493 Mat A_RV,A_VR,A_VV; 494 /* submatrices of local coarse problem */ 495 Mat S_VV,S_CV,S_VC,S_CC; 496 /* working matrices */ 497 Mat C_CR; 498 /* additional working stuff */ 499 PC pc_R; 500 Mat F; 501 PetscBool isLU,isCHOL,isILU; 502 503 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 504 PetscScalar *work; 505 PetscInt *idx_V_B; 506 PetscInt n,n_vertices,n_constraints; 507 PetscInt i,n_R,n_D,n_B; 508 PetscBool unsymmetric_check; 509 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 510 MatType impMatType; 511 /* some shortcuts to scalars */ 512 PetscScalar one=1.0,m_one=-1.0; 513 514 PetscFunctionBegin; 515 /* get number of vertices (corners plus constraints with change of basis) 516 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 517 n_vertices = pcbddc->n_actual_vertices; 518 n_constraints = pcbddc->local_primal_size-n_vertices; 519 /* Set Non-overlapping dimensions */ 520 n_B = pcis->n_B; n_D = pcis->n - n_B; 521 n_R = pcis->n-n_vertices; 522 523 /* Set types for local objects needed by BDDC precondtioner */ 524 impMatType = MATSEQDENSE; 525 526 /* vertices in boundary numbering */ 527 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 528 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 529 if (i != n_vertices) { 530 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 531 } 532 533 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 534 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 535 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 536 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 538 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 540 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 542 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 543 544 unsymmetric_check = PETSC_FALSE; 545 /* allocate workspace */ 546 n = 0; 547 if (n_constraints) { 548 n += n_R*n_constraints; 549 } 550 if (n_vertices) { 551 n = PetscMax(2*n_R*n_vertices,n); 552 } 553 if (!pcbddc->issym) { 554 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 555 unsymmetric_check = PETSC_TRUE; 556 } 557 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 558 559 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 560 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 561 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 564 if (isLU || isILU || isCHOL) { 565 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 566 } else { 567 F = NULL; 568 } 569 570 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 571 if (n_constraints) { 572 Mat M1,M2,M3; 573 IS is_aux; 574 /* see if we can save some allocations */ 575 if (pcbddc->local_auxmat2) { 576 PetscInt on_R,on_constraints; 577 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 578 if (on_R != n_R || on_constraints != n_constraints) { 579 PetscScalar *marray; 580 581 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 582 ierr = PetscFree(marray);CHKERRQ(ierr); 583 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 585 } 586 } 587 /* auxiliary matrices */ 588 if (!pcbddc->local_auxmat2) { 589 PetscScalar *marray; 590 591 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 593 marray += n_R*n_constraints; 594 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 595 } 596 597 /* Extract constraints on R nodes: C_{CR} */ 598 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 599 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 600 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 601 602 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 603 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 604 for (i=0;i<n_constraints;i++) { 605 const PetscScalar *row_cmat_values; 606 const PetscInt *row_cmat_indices; 607 PetscInt size_of_constraint,j; 608 609 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 610 for (j=0;j<size_of_constraint;j++) { 611 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 612 } 613 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 614 } 615 if (F) { 616 Mat B; 617 618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 619 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 620 ierr = MatDestroy(&B);CHKERRQ(ierr); 621 } else { 622 PetscScalar *xarray; 623 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 624 for (i=0;i<n_constraints;i++) { 625 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 626 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 627 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 628 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 630 } 631 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 632 } 633 634 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 635 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 636 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 637 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 639 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 640 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 641 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 643 ierr = MatDestroy(&M2);CHKERRQ(ierr); 644 ierr = MatDestroy(&M3);CHKERRQ(ierr); 645 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 646 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 647 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 648 ierr = MatDestroy(&M1);CHKERRQ(ierr); 649 } 650 /* Get submatrices from subdomain matrix */ 651 if (n_vertices) { 652 Mat newmat; 653 IS is_aux; 654 PetscInt ibs,mbs; 655 PetscBool issbaij; 656 657 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 658 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 659 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 660 if (ibs != mbs) { /* need to convert to SEQAIJ */ 661 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 662 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 665 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 666 } else { 667 /* this is safe */ 668 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 669 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 670 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 671 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 672 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 673 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 674 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 675 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 676 } else { 677 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 678 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 679 } 680 } 681 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 682 } 683 684 /* Matrix of coarse basis functions (local) */ 685 if (pcbddc->coarse_phi_B) { 686 PetscInt on_B,on_primal,on_D=n_D; 687 if (pcbddc->coarse_phi_D) { 688 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 689 } 690 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 691 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 692 PetscScalar *marray; 693 694 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 695 ierr = PetscFree(marray);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 700 } 701 } 702 703 if (!pcbddc->coarse_phi_B) { 704 PetscScalar *marray; 705 706 n = n_B*pcbddc->local_primal_size; 707 if (pcbddc->switch_static || pcbddc->dbg_flag) { 708 n += n_D*pcbddc->local_primal_size; 709 } 710 if (!pcbddc->issym) { 711 n *= 2; 712 } 713 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 715 n = n_B*pcbddc->local_primal_size; 716 if (pcbddc->switch_static || pcbddc->dbg_flag) { 717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 718 n += n_D*pcbddc->local_primal_size; 719 } 720 if (!pcbddc->issym) { 721 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 if (pcbddc->switch_static || pcbddc->dbg_flag) { 723 n = n_B*pcbddc->local_primal_size; 724 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 725 } 726 } else { 727 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 728 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 729 if (pcbddc->switch_static || pcbddc->dbg_flag) { 730 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 731 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 732 } 733 } 734 } 735 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 736 /* vertices */ 737 if (n_vertices) { 738 739 if (n_R) { 740 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 741 PetscBLASInt B_N,B_one = 1; 742 PetscScalar *x,*y; 743 744 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 746 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 747 if (F) { 748 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 749 } else { 750 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 751 for (i=0;i<n_vertices;i++) { 752 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 753 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 754 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 755 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 757 } 758 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 759 } 760 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 761 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 762 if (n_constraints) { 763 Mat B; 764 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 766 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 767 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 768 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 769 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 770 ierr = MatDestroy(&B);CHKERRQ(ierr); 771 } 772 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 773 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 774 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 775 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 778 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 779 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 780 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 781 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 782 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 783 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 784 } else { 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 787 } 788 /* coarse basis functions */ 789 for (i=0;i<n_vertices;i++) { 790 PetscScalar *y; 791 792 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 793 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 794 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 795 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 y[n_B*i+idx_V_B[i]] = 1.0; 798 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 799 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 800 801 if (pcbddc->switch_static || pcbddc->dbg_flag) { 802 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 803 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 804 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 807 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 808 } 809 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 810 } 811 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 812 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 813 } 814 815 if (n_constraints) { 816 Mat B; 817 818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 819 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 820 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 821 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 822 if (n_vertices) { 823 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 /* coarse basis functions */ 827 for (i=0;i<n_constraints;i++) { 828 PetscScalar *y; 829 830 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 831 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 832 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 833 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 839 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 840 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 841 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 845 } 846 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 847 } 848 } 849 850 /* compute other basis functions for non-symmetric problems */ 851 if (!pcbddc->issym) { 852 Mat B,X; 853 854 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 855 856 if (n_constraints) { 857 Mat S_CCT,B_C; 858 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 860 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 861 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 862 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 863 if (n_vertices) { 864 Mat B_V,S_VCT; 865 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 867 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 868 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 869 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 871 } 872 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 873 } 874 if (n_vertices && n_R) { 875 Mat A_VRT; 876 PetscBLASInt B_N,B_one = 1; 877 878 if (!n_constraints) { /* if there are no constraints, reset work */ 879 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 882 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 883 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 884 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 885 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 886 } 887 888 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 889 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 890 for (i=0;i<pcbddc->local_primal_size;i++) { 891 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 892 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 893 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 894 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 896 } 897 } else { 898 for (i=0;i<pcbddc->local_primal_size;i++) { 899 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 900 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 901 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 902 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<pcbddc->local_primal_size;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 if (i<n_vertices) { 917 y[n_B*i+idx_V_B[i]] = 1.0; 918 } 919 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 920 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 921 922 if (pcbddc->switch_static || pcbddc->dbg_flag) { 923 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 924 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 925 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 928 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 929 } 930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 931 } 932 ierr = MatDestroy(&X);CHKERRQ(ierr); 933 } 934 935 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 936 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 940 /* Checking coarse_sub_mat and coarse basis functios */ 941 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 942 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 if (pcbddc->dbg_flag) { 944 Mat coarse_sub_mat; 945 Mat AUXMAT,TM1,TM2,TM3,TM4; 946 Mat coarse_phi_D,coarse_phi_B; 947 Mat coarse_psi_D,coarse_psi_B; 948 Mat A_II,A_BB,A_IB,A_BI; 949 MatType checkmattype=MATSEQAIJ; 950 PetscReal real_value; 951 952 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 953 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 956 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 958 if (unsymmetric_check) { 959 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 960 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 961 } 962 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 963 964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 967 if (unsymmetric_check) { 968 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 969 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 970 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 971 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 972 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 973 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 974 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 975 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 976 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 977 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 978 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 979 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 980 } else { 981 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 982 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 983 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 984 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 985 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 986 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 987 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 988 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 989 } 990 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 991 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 994 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 995 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 996 997 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1000 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1004 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1008 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1010 if (unsymmetric_check) { 1011 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1012 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1013 } 1014 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1015 } 1016 1017 /* free memory */ 1018 ierr = PetscFree(work);CHKERRQ(ierr); 1019 if (n_vertices) { 1020 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1021 } 1022 if (n_constraints) { 1023 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1024 } 1025 /* get back data */ 1026 *coarse_submat_vals_n = coarse_submat_vals; 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1032 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1033 { 1034 Mat *work_mat; 1035 IS isrow_s,iscol_s; 1036 PetscBool rsorted,csorted; 1037 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1042 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1043 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1045 1046 if (!rsorted) { 1047 const PetscInt *idxs; 1048 PetscInt *idxs_sorted,i; 1049 1050 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1052 for (i=0;i<rsize;i++) { 1053 idxs_perm_r[i] = i; 1054 } 1055 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1056 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1057 for (i=0;i<rsize;i++) { 1058 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1059 } 1060 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1061 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1062 } else { 1063 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1064 isrow_s = isrow; 1065 } 1066 1067 if (!csorted) { 1068 if (isrow == iscol) { 1069 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1070 iscol_s = isrow_s; 1071 } else { 1072 const PetscInt *idxs; 1073 PetscInt *idxs_sorted,i; 1074 1075 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1077 for (i=0;i<csize;i++) { 1078 idxs_perm_c[i] = i; 1079 } 1080 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1081 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1082 for (i=0;i<csize;i++) { 1083 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1084 } 1085 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1087 } 1088 } else { 1089 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1090 iscol_s = iscol; 1091 } 1092 1093 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1094 1095 if (!rsorted || !csorted) { 1096 Mat new_mat; 1097 IS is_perm_r,is_perm_c; 1098 1099 if (!rsorted) { 1100 PetscInt *idxs_r,i; 1101 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1102 for (i=0;i<rsize;i++) { 1103 idxs_r[idxs_perm_r[i]] = i; 1104 } 1105 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1107 } else { 1108 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1109 } 1110 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1111 1112 if (!csorted) { 1113 if (isrow_s == iscol_s) { 1114 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1115 is_perm_c = is_perm_r; 1116 } else { 1117 PetscInt *idxs_c,i; 1118 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1119 for (i=0;i<csize;i++) { 1120 idxs_c[idxs_perm_c[i]] = i; 1121 } 1122 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1123 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1124 } 1125 } else { 1126 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1127 } 1128 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1129 1130 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1131 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1132 work_mat[0] = new_mat; 1133 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1134 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1135 } 1136 1137 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1138 *B = work_mat[0]; 1139 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1140 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1141 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1147 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1148 { 1149 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1150 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1151 Mat new_mat; 1152 IS is_local,is_global; 1153 PetscInt local_size; 1154 PetscBool isseqaij; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1159 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1160 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1161 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1162 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1163 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1164 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1165 1166 /* check */ 1167 if (pcbddc->dbg_flag) { 1168 Vec x,x_change; 1169 PetscReal error; 1170 1171 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1172 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1173 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1174 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1177 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1180 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1183 ierr = VecDestroy(&x);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1185 } 1186 1187 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1188 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1189 if (isseqaij) { 1190 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1191 } else { 1192 Mat work_mat; 1193 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1194 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1195 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1196 } 1197 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1198 /* 1199 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1200 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1201 */ 1202 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1208 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1209 { 1210 PC_IS* pcis = (PC_IS*)(pc->data); 1211 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1212 IS is_aux1,is_aux2; 1213 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1214 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1215 PetscInt vbs,bs; 1216 PetscBT bitmask; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 /* 1221 No need to setup local scatters if 1222 - primal space is unchanged 1223 AND 1224 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1225 AND 1226 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1227 */ 1228 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1229 PetscFunctionReturn(0); 1230 } 1231 /* destroy old objects */ 1232 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1233 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1235 /* Set Non-overlapping dimensions */ 1236 n_B = pcis->n_B; n_D = pcis->n - n_B; 1237 n_vertices = pcbddc->n_actual_vertices; 1238 /* create auxiliary bitmask */ 1239 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1240 for (i=0;i<n_vertices;i++) { 1241 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1242 } 1243 1244 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1245 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1246 for (i=0, n_R=0; i<pcis->n; i++) { 1247 if (!PetscBTLookup(bitmask,i)) { 1248 idx_R_local[n_R] = i; 1249 n_R++; 1250 } 1251 } 1252 1253 /* Block code */ 1254 vbs = 1; 1255 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1256 if (bs>1 && !(n_vertices%bs)) { 1257 PetscBool is_blocked = PETSC_TRUE; 1258 PetscInt *vary; 1259 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1260 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1261 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1262 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1263 for (i=0; i<n_vertices; i++) { 1264 if (vary[i]!=0 && vary[i]!=bs) { 1265 is_blocked = PETSC_FALSE; 1266 break; 1267 } 1268 } 1269 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1270 vbs = bs; 1271 for (i=0;i<n_R/vbs;i++) { 1272 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1273 } 1274 } 1275 ierr = PetscFree(vary);CHKERRQ(ierr); 1276 } 1277 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1278 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1279 1280 /* print some info if requested */ 1281 if (pcbddc->dbg_flag) { 1282 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1289 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1290 } 1291 1292 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1293 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1296 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1297 for (i=0; i<n_D; i++) { 1298 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1299 } 1300 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1301 for (i=0, j=0; i<n_R; i++) { 1302 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1303 aux_array1[j++] = i; 1304 } 1305 } 1306 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1307 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1308 for (i=0, j=0; i<n_B; i++) { 1309 if (!PetscBTLookup(bitmask,is_indices[i])) { 1310 aux_array2[j++] = i; 1311 } 1312 } 1313 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1314 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1315 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1316 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1318 1319 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1320 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1321 for (i=0, j=0; i<n_R; i++) { 1322 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1323 aux_array1[j++] = i; 1324 } 1325 } 1326 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1327 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1328 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1329 } 1330 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1338 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1339 { 1340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1341 PC_IS *pcis = (PC_IS*)pc->data; 1342 PC pc_temp; 1343 Mat A_RR; 1344 MatReuse reuse; 1345 PetscScalar m_one = -1.0; 1346 PetscReal value; 1347 PetscInt n_D,n_R,ibs,mbs; 1348 PetscBool use_exact,use_exact_reduced,issbaij; 1349 PetscErrorCode ierr; 1350 /* prefixes stuff */ 1351 char dir_prefix[256],neu_prefix[256],str_level[16]; 1352 size_t len; 1353 1354 PetscFunctionBegin; 1355 1356 /* compute prefixes */ 1357 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1358 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1359 if (!pcbddc->current_level) { 1360 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1361 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1363 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1366 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1367 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1368 len -= 15; /* remove "pc_bddc_coarse_" */ 1369 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1370 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1371 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1372 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1374 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1376 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1377 } 1378 1379 /* DIRICHLET PROBLEM */ 1380 if (dirichlet) { 1381 /* Matrix for Dirichlet problem is pcis->A_II */ 1382 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1383 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1384 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1385 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1386 /* default */ 1387 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1388 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1390 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1391 if (issbaij) { 1392 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1393 } else { 1394 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1395 } 1396 /* Allow user's customization */ 1397 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1398 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1399 } 1400 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1401 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1402 if (!n_D) { 1403 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1404 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1405 } 1406 /* Set Up KSP for Dirichlet problem of BDDC */ 1407 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1408 /* set ksp_D into pcis data */ 1409 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1410 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1411 pcis->ksp_D = pcbddc->ksp_D; 1412 } 1413 1414 /* NEUMANN PROBLEM */ 1415 A_RR = 0; 1416 if (neumann) { 1417 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1418 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1419 if (pcbddc->ksp_R) { /* already created ksp */ 1420 PetscInt nn_R; 1421 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1422 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1423 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1424 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1425 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1426 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1427 reuse = MAT_INITIAL_MATRIX; 1428 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1429 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1430 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1431 reuse = MAT_INITIAL_MATRIX; 1432 } else { /* safe to reuse the matrix */ 1433 reuse = MAT_REUSE_MATRIX; 1434 } 1435 } 1436 /* last check */ 1437 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1438 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1439 reuse = MAT_INITIAL_MATRIX; 1440 } 1441 } else { /* first time, so we need to create the matrix */ 1442 reuse = MAT_INITIAL_MATRIX; 1443 } 1444 /* extract A_RR */ 1445 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1446 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1447 if (ibs != mbs) { 1448 Mat newmat; 1449 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1450 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1451 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1454 } 1455 if (!pcbddc->ksp_R) { /* create object if not present */ 1456 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1457 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1458 /* default */ 1459 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1460 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1461 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1463 if (issbaij) { 1464 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1465 } else { 1466 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1467 } 1468 /* Allow user's customization */ 1469 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1470 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1471 } 1472 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1473 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1474 if (!n_R) { 1475 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1476 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1477 } 1478 /* Set Up KSP for Neumann problem of BDDC */ 1479 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1480 } 1481 1482 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1483 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1484 if (pcbddc->dbg_flag) { 1485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1488 } 1489 if (dirichlet) { /* Dirichlet */ 1490 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1491 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1492 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1495 /* need to be adapted? */ 1496 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1497 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1498 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1499 /* print info */ 1500 if (pcbddc->dbg_flag) { 1501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1503 } 1504 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1505 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1506 } 1507 } 1508 if (neumann) { /* Neumann */ 1509 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1510 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1511 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1514 /* need to be adapted? */ 1515 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1516 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1517 /* print info */ 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1521 } 1522 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1523 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1524 } 1525 } 1526 } 1527 /* free Neumann problem's matrix */ 1528 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1534 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1535 { 1536 PetscErrorCode ierr; 1537 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1538 1539 PetscFunctionBegin; 1540 if (applytranspose) { 1541 if (pcbddc->local_auxmat1) { 1542 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1543 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1544 } 1545 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1546 } else { 1547 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1548 if (pcbddc->local_auxmat1) { 1549 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1550 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1551 } 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 1556 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1559 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1560 { 1561 PetscErrorCode ierr; 1562 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1563 PC_IS* pcis = (PC_IS*) (pc->data); 1564 const PetscScalar zero = 0.0; 1565 1566 PetscFunctionBegin; 1567 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1568 if (applytranspose) { 1569 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1570 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1571 } else { 1572 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1573 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1574 } 1575 /* start communications from local primal nodes to rhs of coarse solver */ 1576 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1577 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 1580 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1581 /* TODO remove null space when doing multilevel */ 1582 if (pcbddc->coarse_ksp) { 1583 if (applytranspose) { 1584 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1585 } else { 1586 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1587 } 1588 } 1589 1590 /* Local solution on R nodes */ 1591 if (pcis->n) { 1592 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1593 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1594 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 if (pcbddc->switch_static) { 1596 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1597 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 } 1599 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1600 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1601 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 if (pcbddc->switch_static) { 1604 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 } 1607 } 1608 1609 /* communications from coarse sol to local primal nodes */ 1610 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1611 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 1613 /* Sum contributions from two levels */ 1614 if (applytranspose) { 1615 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1616 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1617 } else { 1618 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1619 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1626 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1627 { 1628 PetscErrorCode ierr; 1629 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1630 PetscScalar *array; 1631 Vec from,to; 1632 1633 PetscFunctionBegin; 1634 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1635 from = pcbddc->coarse_vec; 1636 to = pcbddc->vec1_P; 1637 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1638 Vec tvec; 1639 1640 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1641 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1642 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1643 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1644 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1645 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1646 } 1647 } else { /* from local to global -> put data in coarse right hand side */ 1648 from = pcbddc->vec1_P; 1649 to = pcbddc->coarse_vec; 1650 } 1651 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1657 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1658 { 1659 PetscErrorCode ierr; 1660 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1661 PetscScalar *array; 1662 Vec from,to; 1663 1664 PetscFunctionBegin; 1665 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1666 from = pcbddc->coarse_vec; 1667 to = pcbddc->vec1_P; 1668 } else { /* from local to global -> put data in coarse right hand side */ 1669 from = pcbddc->vec1_P; 1670 to = pcbddc->coarse_vec; 1671 } 1672 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1673 if (smode == SCATTER_FORWARD) { 1674 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1675 Vec tvec; 1676 1677 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1678 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1679 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1680 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1681 } 1682 } else { 1683 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1684 ierr = VecResetArray(from);CHKERRQ(ierr); 1685 } 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 /* uncomment for testing purposes */ 1691 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1694 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1695 { 1696 PetscErrorCode ierr; 1697 PC_IS* pcis = (PC_IS*)(pc->data); 1698 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1699 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1700 /* one and zero */ 1701 PetscScalar one=1.0,zero=0.0; 1702 /* space to store constraints and their local indices */ 1703 PetscScalar *temp_quadrature_constraint; 1704 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1705 /* iterators */ 1706 PetscInt i,j,k,total_counts,temp_start_ptr; 1707 /* BLAS integers */ 1708 PetscBLASInt lwork,lierr; 1709 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1710 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1711 /* reuse */ 1712 PetscInt olocal_primal_size; 1713 PetscInt *oprimal_indices_local_idxs; 1714 /* change of basis */ 1715 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1716 PetscBool boolforchange,qr_needed; 1717 PetscBT touched,change_basis,qr_needed_idx; 1718 /* auxiliary stuff */ 1719 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1720 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1721 PetscScalar *temp_quadrature_constraint_work=NULL; 1722 /* some quantities */ 1723 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1724 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1725 1726 PetscFunctionBegin; 1727 /* Destroy Mat objects computed previously */ 1728 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1729 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1730 1731 /* print some info */ 1732 if (pcbddc->dbg_flag) { 1733 IS vertices; 1734 PetscInt nv,nedges,nfaces; 1735 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1736 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1737 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1738 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1741 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_edges);CHKERRQ(ierr); 1742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nedges,pcbddc->use_faces);CHKERRQ(ierr); 1743 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1744 } 1745 1746 if (!pcbddc->adaptive_selection) { 1747 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1748 MatNullSpace nearnullsp; 1749 const Vec *nearnullvecs; 1750 Vec *localnearnullsp; 1751 PetscScalar *array; 1752 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1753 PetscBool nnsp_has_cnst; 1754 /* LAPACK working arrays for SVD or POD */ 1755 PetscBool skip_lapack; 1756 PetscScalar *work; 1757 PetscReal *singular_vals; 1758 #if defined(PETSC_USE_COMPLEX) 1759 PetscReal *rwork; 1760 #endif 1761 #if defined(PETSC_MISSING_LAPACK_GESVD) 1762 PetscScalar *temp_basis,*correlation_mat; 1763 #else 1764 PetscBLASInt dummy_int; 1765 PetscScalar dummy_scalar; 1766 #endif 1767 1768 /* Get index sets for faces, edges and vertices from graph */ 1769 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1770 /* free unneeded index sets */ 1771 if (!pcbddc->use_vertices) { 1772 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1773 } 1774 if (!pcbddc->use_edges) { 1775 for (i=0;i<n_ISForEdges;i++) { 1776 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1777 } 1778 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1779 n_ISForEdges = 0; 1780 } 1781 if (!pcbddc->use_faces) { 1782 for (i=0;i<n_ISForFaces;i++) { 1783 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1784 } 1785 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1786 n_ISForFaces = 0; 1787 } 1788 /* HACKS (the following two blocks of code) */ 1789 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1790 pcbddc->use_change_of_basis = PETSC_TRUE; 1791 if (!ISForEdges) { 1792 pcbddc->use_change_on_faces = PETSC_TRUE; 1793 } 1794 } 1795 if (pcbddc->NullSpace) { 1796 /* use_change_of_basis should be consistent among processors */ 1797 PetscBool tbool[2],gbool[2]; 1798 tbool [0] = pcbddc->use_change_of_basis; 1799 tbool [1] = pcbddc->use_change_on_faces; 1800 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1801 pcbddc->use_change_of_basis = gbool[0]; 1802 pcbddc->use_change_on_faces = gbool[1]; 1803 } 1804 1805 /* check if near null space is attached to global mat */ 1806 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1807 if (nearnullsp) { 1808 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1809 /* remove any stored info */ 1810 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1811 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1812 /* store information for BDDC solver reuse */ 1813 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1814 pcbddc->onearnullspace = nearnullsp; 1815 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1816 for (i=0;i<nnsp_size;i++) { 1817 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1818 } 1819 } else { /* if near null space is not provided BDDC uses constants by default */ 1820 nnsp_size = 0; 1821 nnsp_has_cnst = PETSC_TRUE; 1822 } 1823 /* get max number of constraints on a single cc */ 1824 max_constraints = nnsp_size; 1825 if (nnsp_has_cnst) max_constraints++; 1826 1827 /* 1828 Evaluate maximum storage size needed by the procedure 1829 - temp_indices will contain start index of each constraint stored as follows 1830 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1831 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1832 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1833 */ 1834 total_counts = n_ISForFaces+n_ISForEdges; 1835 total_counts *= max_constraints; 1836 n_vertices = 0; 1837 if (ISForVertices) { 1838 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1839 } 1840 total_counts += n_vertices; 1841 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1842 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1843 total_counts = 0; 1844 max_size_of_constraint = 0; 1845 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1846 if (i<n_ISForEdges) { 1847 used_IS = &ISForEdges[i]; 1848 } else { 1849 used_IS = &ISForFaces[i-n_ISForEdges]; 1850 } 1851 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1852 total_counts += j; 1853 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1854 } 1855 total_counts *= max_constraints; 1856 total_counts += n_vertices; 1857 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1858 /* get local part of global near null space vectors */ 1859 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1860 for (k=0;k<nnsp_size;k++) { 1861 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1862 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1864 } 1865 1866 /* whether or not to skip lapack calls */ 1867 skip_lapack = PETSC_TRUE; 1868 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1869 1870 /* allocate some auxiliary stuff */ 1871 if (!skip_lapack || pcbddc->use_qr_single) { 1872 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1873 } 1874 1875 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1876 if (!skip_lapack) { 1877 PetscScalar temp_work; 1878 1879 #if defined(PETSC_MISSING_LAPACK_GESVD) 1880 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1881 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1882 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1883 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1884 #if defined(PETSC_USE_COMPLEX) 1885 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1886 #endif 1887 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1888 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1889 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1890 lwork = -1; 1891 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1892 #if !defined(PETSC_USE_COMPLEX) 1893 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1894 #else 1895 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1896 #endif 1897 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1898 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1899 #else /* on missing GESVD */ 1900 /* SVD */ 1901 PetscInt max_n,min_n; 1902 max_n = max_size_of_constraint; 1903 min_n = max_constraints; 1904 if (max_size_of_constraint < max_constraints) { 1905 min_n = max_size_of_constraint; 1906 max_n = max_constraints; 1907 } 1908 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1909 #if defined(PETSC_USE_COMPLEX) 1910 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1911 #endif 1912 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1913 lwork = -1; 1914 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1916 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1917 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1918 #if !defined(PETSC_USE_COMPLEX) 1919 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1920 #else 1921 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1922 #endif 1923 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1924 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1925 #endif /* on missing GESVD */ 1926 /* Allocate optimal workspace */ 1927 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1928 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1929 } 1930 /* Now we can loop on constraining sets */ 1931 total_counts = 0; 1932 temp_indices[0] = 0; 1933 /* vertices */ 1934 if (ISForVertices) { 1935 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1936 if (nnsp_has_cnst) { /* consider all vertices */ 1937 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0;i<n_vertices;i++) { 1939 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1940 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1941 total_counts++; 1942 } 1943 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1944 PetscBool used_vertex; 1945 for (i=0;i<n_vertices;i++) { 1946 used_vertex = PETSC_FALSE; 1947 k = 0; 1948 while (!used_vertex && k<nnsp_size) { 1949 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1950 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1951 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1952 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1953 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1954 total_counts++; 1955 used_vertex = PETSC_TRUE; 1956 } 1957 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1958 k++; 1959 } 1960 } 1961 } 1962 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1963 n_vertices = total_counts; 1964 } 1965 1966 /* edges and faces */ 1967 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1968 if (ncc<n_ISForEdges) { 1969 used_IS = &ISForEdges[ncc]; 1970 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1971 } else { 1972 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1973 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1974 } 1975 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1976 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1977 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1978 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1979 /* change of basis should not be performed on local periodic nodes */ 1980 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1981 if (nnsp_has_cnst) { 1982 PetscScalar quad_value; 1983 temp_constraints++; 1984 if (!pcbddc->use_nnsp_true) { 1985 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1986 } else { 1987 quad_value = 1.0; 1988 } 1989 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1990 for (j=0;j<size_of_constraint;j++) { 1991 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1992 } 1993 /* sort by global ordering if using lapack subroutines (not needed!) */ 1994 if (!skip_lapack || pcbddc->use_qr_single) { 1995 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1996 for (j=0;j<size_of_constraint;j++) { 1997 permutation[j]=j; 1998 } 1999 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2000 for (j=0;j<size_of_constraint;j++) { 2001 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2002 } 2003 for (j=0;j<size_of_constraint;j++) { 2004 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2005 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2006 } 2007 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2008 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2009 } 2010 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2011 total_counts++; 2012 } 2013 for (k=0;k<nnsp_size;k++) { 2014 PetscReal real_value; 2015 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2016 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2017 for (j=0;j<size_of_constraint;j++) { 2018 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2019 } 2020 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2021 /* check if array is null on the connected component */ 2022 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2023 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2024 if (real_value > 0.0) { /* keep indices and values */ 2025 /* sort by global ordering if using lapack subroutines */ 2026 if (!skip_lapack || pcbddc->use_qr_single) { 2027 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2028 for (j=0;j<size_of_constraint;j++) { 2029 permutation[j]=j; 2030 } 2031 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2032 for (j=0;j<size_of_constraint;j++) { 2033 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2034 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2035 } 2036 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2037 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2038 } 2039 temp_constraints++; 2040 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2041 total_counts++; 2042 } 2043 } 2044 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2045 valid_constraints = temp_constraints; 2046 if (!pcbddc->use_nnsp_true && temp_constraints) { 2047 if (temp_constraints == 1) { /* just normalize the constraint */ 2048 PetscScalar norm; 2049 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2050 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2051 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2052 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2053 } else { /* perform SVD */ 2054 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2055 2056 #if defined(PETSC_MISSING_LAPACK_GESVD) 2057 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2058 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2059 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2060 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2061 from that computed using LAPACKgesvd 2062 -> This is due to a different computation of eigenvectors in LAPACKheev 2063 -> The quality of the POD-computed basis will be the same */ 2064 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2065 /* Store upper triangular part of correlation matrix */ 2066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2067 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2068 for (j=0;j<temp_constraints;j++) { 2069 for (k=0;k<j+1;k++) { 2070 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2071 } 2072 } 2073 /* compute eigenvalues and eigenvectors of correlation matrix */ 2074 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2075 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2076 #if !defined(PETSC_USE_COMPLEX) 2077 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2078 #else 2079 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2080 #endif 2081 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2082 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2083 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2084 j = 0; 2085 while (j < temp_constraints && singular_vals[j] < tol) j++; 2086 total_counts = total_counts-j; 2087 valid_constraints = temp_constraints-j; 2088 /* scale and copy POD basis into used quadrature memory */ 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2095 if (j<temp_constraints) { 2096 PetscInt ii; 2097 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2098 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2099 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 for (k=0;k<temp_constraints-j;k++) { 2102 for (ii=0;ii<size_of_constraint;ii++) { 2103 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2104 } 2105 } 2106 } 2107 #else /* on missing GESVD */ 2108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2109 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2111 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2112 #if !defined(PETSC_USE_COMPLEX) 2113 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2114 #else 2115 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2116 #endif 2117 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2118 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2119 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2120 k = temp_constraints; 2121 if (k > size_of_constraint) k = size_of_constraint; 2122 j = 0; 2123 while (j < k && singular_vals[k-j-1] < tol) j++; 2124 valid_constraints = k-j; 2125 total_counts = total_counts-temp_constraints+valid_constraints; 2126 #endif /* on missing GESVD */ 2127 } 2128 } 2129 /* setting change_of_basis flag is safe now */ 2130 if (boolforchange) { 2131 for (j=0;j<valid_constraints;j++) { 2132 PetscBTSet(change_basis,total_counts-j-1); 2133 } 2134 } 2135 } 2136 /* free workspace */ 2137 if (!skip_lapack || pcbddc->use_qr_single) { 2138 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2139 } 2140 if (!skip_lapack) { 2141 ierr = PetscFree(work);CHKERRQ(ierr); 2142 #if defined(PETSC_USE_COMPLEX) 2143 ierr = PetscFree(rwork);CHKERRQ(ierr); 2144 #endif 2145 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2146 #if defined(PETSC_MISSING_LAPACK_GESVD) 2147 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2148 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2149 #endif 2150 } 2151 for (k=0;k<nnsp_size;k++) { 2152 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2153 } 2154 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2155 /* free index sets of faces, edges and vertices */ 2156 for (i=0;i<n_ISForFaces;i++) { 2157 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2158 } 2159 if (n_ISForFaces) { 2160 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2161 } 2162 for (i=0;i<n_ISForEdges;i++) { 2163 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2164 } 2165 if (n_ISForEdges) { 2166 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2167 } 2168 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2169 } else { 2170 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2171 PetscInt cum = 0; 2172 2173 total_counts = 0; 2174 n_vertices = 0; 2175 if (sub_schurs->is_Ej_com) { 2176 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2177 } 2178 max_constraints = 0; 2179 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2180 total_counts += pcbddc->adaptive_constraints_n[i]; 2181 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2182 } 2183 temp_indices = pcbddc->adaptive_constraints_ptrs; 2184 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2185 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2186 2187 #if 0 2188 printf("Found %d totals\n",total_counts); 2189 for (i=0;i<total_counts;i++) { 2190 printf("const %d, start %d",i,temp_indices[i]); 2191 printf(" end %d:\n",temp_indices[i+1]); 2192 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2193 printf(" idxs %d",temp_indices_to_constraint[j]); 2194 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2195 } 2196 } 2197 for (i=0;i<n_vertices;i++) { 2198 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2199 } 2200 for (i=0;i<sub_schurs->n_subs;i++) { 2201 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2202 } 2203 #endif 2204 2205 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2206 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2207 /* Change of basis */ 2208 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2209 if (pcbddc->use_change_of_basis) { 2210 cum = n_vertices; 2211 for (i=0;i<sub_schurs->n_subs;i++) { 2212 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2213 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2214 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2215 } 2216 } 2217 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2218 } 2219 } 2220 } 2221 2222 /* map temp_indices_to_constraint in boundary numbering */ 2223 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2224 if (i != temp_indices[total_counts]) { 2225 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2226 } 2227 2228 /* set quantities in pcbddc data structure and store previous primal size */ 2229 /* n_vertices defines the number of subdomain corners in the primal space */ 2230 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2231 olocal_primal_size = pcbddc->local_primal_size; 2232 pcbddc->local_primal_size = total_counts; 2233 pcbddc->n_vertices = n_vertices; 2234 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2235 2236 /* Create constraint matrix */ 2237 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2238 /* so we need to set it up properly either with or without change of basis */ 2239 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2240 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2241 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2242 /* array to compute a local numbering of constraints : vertices first then constraints */ 2243 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2244 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2245 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2246 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2247 /* auxiliary stuff for basis change */ 2248 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2249 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2250 2251 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2252 total_primal_vertices=0; 2253 for (i=0;i<pcbddc->local_primal_size;i++) { 2254 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2255 if (size_of_constraint == 1) { 2256 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2257 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2258 aux_primal_minloc[total_primal_vertices]=0; 2259 total_primal_vertices++; 2260 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2261 PetscInt min_loc,min_index; 2262 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2263 /* find first untouched local node */ 2264 k = 0; 2265 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2266 min_index = global_indices[k]; 2267 min_loc = k; 2268 /* search the minimum among global nodes already untouched on the cc */ 2269 for (k=1;k<size_of_constraint;k++) { 2270 /* there can be more than one constraint on a single connected component */ 2271 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2272 min_index = global_indices[k]; 2273 min_loc = k; 2274 } 2275 } 2276 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2277 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2278 aux_primal_minloc[total_primal_vertices]=min_loc; 2279 total_primal_vertices++; 2280 } 2281 } 2282 /* determine if a QR strategy is needed for change of basis */ 2283 qr_needed = PETSC_FALSE; 2284 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2285 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2286 if (PetscBTLookup(change_basis,i)) { 2287 if (!pcbddc->use_qr_single) { 2288 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2289 j = 0; 2290 for (k=0;k<size_of_constraint;k++) { 2291 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2292 j++; 2293 } 2294 } 2295 /* found more than one primal dof on the cc */ 2296 if (j > 1) { 2297 PetscBTSet(qr_needed_idx,i); 2298 qr_needed = PETSC_TRUE; 2299 } 2300 } else { 2301 PetscBTSet(qr_needed_idx,i); 2302 qr_needed = PETSC_TRUE; 2303 } 2304 } 2305 } 2306 /* free workspace */ 2307 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2308 2309 /* permute indices in order to have a sorted set of vertices */ 2310 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2311 2312 /* nonzero structure of constraint matrix */ 2313 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2314 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2315 j=total_primal_vertices; 2316 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2317 if (!PetscBTLookup(change_basis,i)) { 2318 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2319 j++; 2320 } 2321 } 2322 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2323 ierr = PetscFree(nnz);CHKERRQ(ierr); 2324 /* set values in constraint matrix */ 2325 for (i=0;i<total_primal_vertices;i++) { 2326 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2327 } 2328 total_counts = total_primal_vertices; 2329 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2330 if (!PetscBTLookup(change_basis,i)) { 2331 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2332 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2333 total_counts++; 2334 } 2335 } 2336 /* assembling */ 2337 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2338 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 /* 2340 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2341 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2342 */ 2343 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2344 if (pcbddc->use_change_of_basis) { 2345 /* dual and primal dofs on a single cc */ 2346 PetscInt dual_dofs,primal_dofs; 2347 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2348 PetscInt primal_counter; 2349 /* working stuff for GEQRF */ 2350 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2351 PetscBLASInt lqr_work; 2352 /* working stuff for UNGQR */ 2353 PetscScalar *gqr_work,lgqr_work_t; 2354 PetscBLASInt lgqr_work; 2355 /* working stuff for TRTRS */ 2356 PetscScalar *trs_rhs; 2357 PetscBLASInt Blas_NRHS; 2358 /* pointers for values insertion into change of basis matrix */ 2359 PetscInt *start_rows,*start_cols; 2360 PetscScalar *start_vals; 2361 /* working stuff for values insertion */ 2362 PetscBT is_primal; 2363 /* matrix sizes */ 2364 PetscInt global_size,local_size; 2365 /* temporary change of basis */ 2366 Mat localChangeOfBasisMatrix; 2367 /* extra space for debugging */ 2368 PetscScalar *dbg_work; 2369 2370 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2371 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2372 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2373 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2374 /* nonzeros for local mat */ 2375 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2376 for (i=0;i<pcis->n;i++) nnz[i]=1; 2377 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2378 if (PetscBTLookup(change_basis,i)) { 2379 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2380 if (PetscBTLookup(qr_needed_idx,i)) { 2381 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2382 } else { 2383 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2384 /* get local primal index on the cc */ 2385 j = 0; 2386 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2387 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2388 } 2389 } 2390 } 2391 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2392 ierr = PetscFree(nnz);CHKERRQ(ierr); 2393 /* Set initial identity in the matrix */ 2394 for (i=0;i<pcis->n;i++) { 2395 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2396 } 2397 2398 if (pcbddc->dbg_flag) { 2399 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2401 } 2402 2403 2404 /* Now we loop on the constraints which need a change of basis */ 2405 /* 2406 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2407 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2408 2409 Basic blocks of change of basis matrix T computed by 2410 2411 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2412 2413 | 1 0 ... 0 s_1/S | 2414 | 0 1 ... 0 s_2/S | 2415 | ... | 2416 | 0 ... 1 s_{n-1}/S | 2417 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2418 2419 with S = \sum_{i=1}^n s_i^2 2420 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2421 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2422 2423 - QR decomposition of constraints otherwise 2424 */ 2425 if (qr_needed) { 2426 /* space to store Q */ 2427 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2428 /* first we issue queries for optimal work */ 2429 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2430 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2431 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2432 lqr_work = -1; 2433 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2434 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2435 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2436 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2437 lgqr_work = -1; 2438 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2439 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2440 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2441 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2442 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2443 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2444 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2445 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2446 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2447 /* array to store scaling factors for reflectors */ 2448 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2449 /* array to store rhs and solution of triangular solver */ 2450 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2451 /* allocating workspace for check */ 2452 if (pcbddc->dbg_flag) { 2453 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2454 } 2455 } 2456 /* array to store whether a node is primal or not */ 2457 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2458 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2459 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2460 if (i != total_primal_vertices) { 2461 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2462 } 2463 for (i=0;i<total_primal_vertices;i++) { 2464 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2465 } 2466 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2467 2468 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2469 /* -> using implicit ordering contained in temp_indices data */ 2470 total_counts = pcbddc->n_vertices; 2471 primal_counter = total_counts; 2472 while (total_counts<pcbddc->local_primal_size) { 2473 primal_dofs = 1; 2474 if (PetscBTLookup(change_basis,total_counts)) { 2475 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2476 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2477 primal_dofs++; 2478 } 2479 /* get constraint info */ 2480 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2481 dual_dofs = size_of_constraint-primal_dofs; 2482 2483 if (pcbddc->dbg_flag) { 2484 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2485 } 2486 2487 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2488 2489 /* copy quadrature constraints for change of basis check */ 2490 if (pcbddc->dbg_flag) { 2491 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2492 } 2493 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2494 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2495 2496 /* compute QR decomposition of constraints */ 2497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2498 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2500 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2501 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2502 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2503 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2504 2505 /* explictly compute R^-T */ 2506 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2507 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2508 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2509 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2512 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2513 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2514 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2515 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2516 2517 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2520 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2522 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2523 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2524 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2525 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2526 2527 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2528 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2529 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2531 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2532 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2536 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2537 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2539 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2540 2541 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2542 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2543 /* insert cols for primal dofs */ 2544 for (j=0;j<primal_dofs;j++) { 2545 start_vals = &qr_basis[j*size_of_constraint]; 2546 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2547 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2548 } 2549 /* insert cols for dual dofs */ 2550 for (j=0,k=0;j<dual_dofs;k++) { 2551 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2552 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2553 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2554 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2555 j++; 2556 } 2557 } 2558 2559 /* check change of basis */ 2560 if (pcbddc->dbg_flag) { 2561 PetscInt ii,jj; 2562 PetscBool valid_qr=PETSC_TRUE; 2563 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2569 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2570 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2571 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2572 for (jj=0;jj<size_of_constraint;jj++) { 2573 for (ii=0;ii<primal_dofs;ii++) { 2574 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2575 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2576 } 2577 } 2578 if (!valid_qr) { 2579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2580 for (jj=0;jj<size_of_constraint;jj++) { 2581 for (ii=0;ii<primal_dofs;ii++) { 2582 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2583 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2584 } 2585 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2586 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2587 } 2588 } 2589 } 2590 } else { 2591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2592 } 2593 } 2594 } else { /* simple transformation block */ 2595 PetscInt row,col; 2596 PetscScalar val,norm; 2597 2598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2599 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2600 for (j=0;j<size_of_constraint;j++) { 2601 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2602 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2603 if (!PetscBTLookup(is_primal,row_B)) { 2604 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2605 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2606 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2607 } else { 2608 for (k=0;k<size_of_constraint;k++) { 2609 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2610 if (row != col) { 2611 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2612 } else { 2613 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2614 } 2615 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2616 } 2617 } 2618 } 2619 if (pcbddc->dbg_flag) { 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2621 } 2622 } 2623 /* increment primal counter */ 2624 primal_counter += primal_dofs; 2625 } else { 2626 if (pcbddc->dbg_flag) { 2627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2628 } 2629 } 2630 /* increment constraint counter total_counts */ 2631 total_counts += primal_dofs; 2632 } 2633 2634 /* free workspace */ 2635 if (qr_needed) { 2636 if (pcbddc->dbg_flag) { 2637 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2638 } 2639 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2640 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2641 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2642 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2643 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2644 } 2645 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2646 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 2649 /* assembling of global change of variable */ 2650 { 2651 Mat tmat; 2652 PetscInt bs; 2653 2654 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2655 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2656 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2657 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2658 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2659 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2660 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2661 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2662 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2663 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2664 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2665 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2666 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2667 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2668 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2669 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2670 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2671 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2672 } 2673 /* check */ 2674 if (pcbddc->dbg_flag) { 2675 PetscReal error; 2676 Vec x,x_change; 2677 2678 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2679 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2680 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2681 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2682 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2683 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2684 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2685 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2686 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2687 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2688 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2689 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2690 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2691 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2692 ierr = VecDestroy(&x);CHKERRQ(ierr); 2693 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2694 } 2695 2696 /* adapt sub_schurs computed (if any) */ 2697 if (pcbddc->use_deluxe_scaling) { 2698 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2699 if (sub_schurs->n_subs_par_g) { 2700 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2701 } 2702 if (sub_schurs->S_Ej_all) { 2703 Mat S_1,S_2,tmat; 2704 IS is_all_N; 2705 2706 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2707 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2708 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2709 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2710 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2711 sub_schurs->S_Ej_all = S_1; 2712 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2713 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2714 sub_schurs->sum_S_Ej_all = S_2; 2715 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2716 } 2717 } 2718 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2719 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2720 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2721 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2722 } 2723 2724 /* set up change of basis context */ 2725 if (pcbddc->ChangeOfBasisMatrix) { 2726 PCBDDCChange_ctx change_ctx; 2727 2728 if (!pcbddc->new_global_mat) { 2729 PetscInt global_size,local_size; 2730 2731 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2732 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2733 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2734 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2735 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2736 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2737 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2738 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2739 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2740 } else { 2741 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2742 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2743 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2744 } 2745 if (!pcbddc->user_ChangeOfBasisMatrix) { 2746 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2747 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2748 } else { 2749 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2750 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2751 } 2752 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2753 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2754 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 } 2757 2758 /* get indices in local ordering for vertices and constraints */ 2759 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2760 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2761 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2762 } 2763 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2764 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2765 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2766 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2767 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2768 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2769 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2771 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2772 /* set quantities in PCBDDC data struct */ 2773 pcbddc->n_actual_vertices = i; 2774 /* check if a new primal space has been introduced */ 2775 pcbddc->new_primal_space_local = PETSC_TRUE; 2776 if (olocal_primal_size == pcbddc->local_primal_size) { 2777 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2778 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2779 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2780 } 2781 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2782 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2783 2784 /* flush dbg viewer */ 2785 if (pcbddc->dbg_flag) { 2786 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2787 } 2788 2789 /* free workspace */ 2790 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2791 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2792 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2793 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2794 if (!pcbddc->adaptive_selection) { 2795 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2796 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2797 } else { 2798 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2799 pcbddc->adaptive_constraints_ptrs, 2800 pcbddc->adaptive_constraints_idxs, 2801 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2802 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2803 } 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2809 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2810 { 2811 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2812 PC_IS *pcis = (PC_IS*)pc->data; 2813 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2814 PetscInt ierr,i,vertex_size; 2815 PetscViewer viewer=pcbddc->dbg_viewer; 2816 2817 PetscFunctionBegin; 2818 /* Reset previously computed graph */ 2819 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2820 /* Init local Graph struct */ 2821 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2822 2823 /* Check validity of the csr graph passed in by the user */ 2824 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2825 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2826 } 2827 2828 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2829 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2830 PetscInt *xadj,*adjncy; 2831 PetscInt nvtxs; 2832 2833 if (pcbddc->use_local_adj) { 2834 PetscBool flg_row=PETSC_FALSE; 2835 2836 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2837 if (flg_row) { 2838 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2839 pcbddc->computed_rowadj = PETSC_TRUE; 2840 } 2841 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2842 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2843 IS is_dummy; 2844 ISLocalToGlobalMapping l2gmap_dummy; 2845 PetscInt j,sum; 2846 PetscInt *cxadj,*cadjncy; 2847 const PetscInt *idxs; 2848 PCBDDCGraph graph; 2849 PetscBT is_on_boundary; 2850 2851 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2852 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2853 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2854 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2855 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2856 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2857 graph->xadj = xadj; 2858 graph->adjncy = adjncy; 2859 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2860 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2861 2862 if (pcbddc->dbg_flag) { 2863 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2864 for (i=0;i<graph->ncc;i++) { 2865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2866 } 2867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2868 } 2869 2870 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2871 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2872 for (i=0;i<pcis->n_B;i++) { 2873 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2874 } 2875 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2876 2877 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2878 sum = 0; 2879 for (i=0;i<graph->ncc;i++) { 2880 PetscInt sizecc = 0; 2881 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2882 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2883 sizecc++; 2884 } 2885 } 2886 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2887 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2888 cxadj[graph->queue[j]] = sizecc; 2889 } 2890 } 2891 sum += sizecc*sizecc; 2892 } 2893 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2894 sum = 0; 2895 for (i=0;i<nvtxs;i++) { 2896 PetscInt temp = cxadj[i]; 2897 cxadj[i] = sum; 2898 sum += temp; 2899 } 2900 cxadj[nvtxs] = sum; 2901 for (i=0;i<graph->ncc;i++) { 2902 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2903 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2904 PetscInt k,sizecc = 0; 2905 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2906 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2907 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2908 sizecc++; 2909 } 2910 } 2911 } 2912 } 2913 } 2914 if (nvtxs) { 2915 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2916 } else { 2917 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2918 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2919 } 2920 graph->xadj = 0; 2921 graph->adjncy = 0; 2922 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2923 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2924 } 2925 } 2926 2927 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2928 vertex_size = 1; 2929 if (pcbddc->user_provided_isfordofs) { 2930 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2931 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2932 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2933 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2934 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2935 } 2936 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2937 pcbddc->n_ISForDofs = 0; 2938 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2939 } 2940 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2941 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2942 } else { 2943 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2944 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2945 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2946 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2947 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2948 } 2949 } 2950 } 2951 2952 /* Setup of Graph */ 2953 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2954 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2955 } 2956 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2957 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2958 } 2959 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2960 2961 /* Graph's connected components analysis */ 2962 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2963 2964 /* print some info to stdout */ 2965 if (pcbddc->dbg_flag) { 2966 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2967 } 2968 2969 /* mark topography has done */ 2970 pcbddc->recompute_topography = PETSC_FALSE; 2971 PetscFunctionReturn(0); 2972 } 2973 2974 #undef __FUNCT__ 2975 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2976 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2977 { 2978 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2979 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 n = 0; 2984 vertices = 0; 2985 if (pcbddc->ConstraintMatrix) { 2986 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2987 for (i=0;i<local_primal_size;i++) { 2988 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2989 if (size_of_constraint == 1) n++; 2990 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2991 } 2992 if (vertices_idx) { 2993 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2994 n = 0; 2995 for (i=0;i<local_primal_size;i++) { 2996 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2997 if (size_of_constraint == 1) { 2998 vertices[n++]=row_cmat_indices[0]; 2999 } 3000 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3001 } 3002 } 3003 } 3004 *n_vertices = n; 3005 if (vertices_idx) *vertices_idx = vertices; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3011 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3012 { 3013 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3014 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3015 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3016 PetscBT touched; 3017 PetscErrorCode ierr; 3018 3019 /* This function assumes that the number of local constraints per connected component 3020 is not greater than the number of nodes defined for the connected component 3021 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3022 PetscFunctionBegin; 3023 n = 0; 3024 constraints_index = 0; 3025 if (pcbddc->ConstraintMatrix) { 3026 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3027 max_size_of_constraint = 0; 3028 for (i=0;i<local_primal_size;i++) { 3029 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3030 if (size_of_constraint > 1) { 3031 n++; 3032 } 3033 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3034 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3035 } 3036 if (constraints_idx) { 3037 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3038 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3039 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3040 n = 0; 3041 for (i=0;i<local_primal_size;i++) { 3042 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3043 if (size_of_constraint > 1) { 3044 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3045 /* find first untouched local node */ 3046 j = 0; 3047 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3048 min_index = row_cmat_global_indices[j]; 3049 min_loc = j; 3050 /* search the minimum among nodes not yet touched on the connected component 3051 since there can be more than one constraint on a single cc */ 3052 for (j=1;j<size_of_constraint;j++) { 3053 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3054 min_index = row_cmat_global_indices[j]; 3055 min_loc = j; 3056 } 3057 } 3058 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3059 constraints_index[n++] = row_cmat_indices[min_loc]; 3060 } 3061 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3062 } 3063 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3064 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3065 } 3066 } 3067 *n_constraints = n; 3068 if (constraints_idx) *constraints_idx = constraints_index; 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "PCBDDCSubsetNumbering" 3074 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3075 { 3076 Vec local_vec,global_vec; 3077 IS seqis,paris; 3078 VecScatter scatter_ctx; 3079 PetscScalar *array; 3080 PetscInt *temp_global_dofs; 3081 PetscScalar globalsum; 3082 PetscInt i,j,s; 3083 PetscInt nlocals,first_index,old_index,max_local; 3084 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3085 PetscMPIInt *dof_sizes,*dof_displs; 3086 PetscBool first_found; 3087 PetscErrorCode ierr; 3088 3089 PetscFunctionBegin; 3090 /* mpi buffers */ 3091 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3092 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3093 j = ( !rank_prec_comm ? size_prec_comm : 0); 3094 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3095 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3096 /* get maximum size of subset */ 3097 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3098 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3099 max_local = 0; 3100 for (i=0;i<n_local_dofs;i++) { 3101 if (max_local < temp_global_dofs[i] ) { 3102 max_local = temp_global_dofs[i]; 3103 } 3104 } 3105 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3106 max_global++; 3107 max_local = 0; 3108 for (i=0;i<n_local_dofs;i++) { 3109 if (max_local < local_dofs[i] ) { 3110 max_local = local_dofs[i]; 3111 } 3112 } 3113 max_local++; 3114 /* allocate workspace */ 3115 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3116 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3117 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3118 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3119 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3120 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3121 /* create scatter */ 3122 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3123 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3124 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3125 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3126 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3127 /* init array */ 3128 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3129 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3130 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3131 if (local_dofs_mult) { 3132 for (i=0;i<n_local_dofs;i++) { 3133 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3134 } 3135 } else { 3136 for (i=0;i<n_local_dofs;i++) { 3137 array[local_dofs[i]]=1.0; 3138 } 3139 } 3140 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3141 /* scatter into global vec and get total number of global dofs */ 3142 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3143 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3145 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3146 /* Fill global_vec with cumulative function for global numbering */ 3147 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3148 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3149 nlocals = 0; 3150 first_index = -1; 3151 first_found = PETSC_FALSE; 3152 for (i=0;i<s;i++) { 3153 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3154 first_found = PETSC_TRUE; 3155 first_index = i; 3156 } 3157 nlocals += (PetscInt)PetscRealPart(array[i]); 3158 } 3159 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3160 if (!rank_prec_comm) { 3161 dof_displs[0]=0; 3162 for (i=1;i<size_prec_comm;i++) { 3163 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3164 } 3165 } 3166 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3167 if (first_found) { 3168 array[first_index] += (PetscScalar)nlocals; 3169 old_index = first_index; 3170 for (i=first_index+1;i<s;i++) { 3171 if (PetscRealPart(array[i]) > 0.1) { 3172 array[i] += array[old_index]; 3173 old_index = i; 3174 } 3175 } 3176 } 3177 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3178 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3179 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3180 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3181 /* get global ordering of local dofs */ 3182 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3183 if (local_dofs_mult) { 3184 for (i=0;i<n_local_dofs;i++) { 3185 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3186 } 3187 } else { 3188 for (i=0;i<n_local_dofs;i++) { 3189 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3190 } 3191 } 3192 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3193 /* free workspace */ 3194 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3195 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3196 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3197 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3198 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3199 /* return pointer to global ordering of local dofs */ 3200 *global_numbering_subset = temp_global_dofs; 3201 PetscFunctionReturn(0); 3202 } 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3206 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3207 { 3208 PetscInt i,j; 3209 PetscScalar *alphas; 3210 PetscErrorCode ierr; 3211 3212 PetscFunctionBegin; 3213 /* this implements stabilized Gram-Schmidt */ 3214 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3215 for (i=0;i<n;i++) { 3216 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3217 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3218 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3219 } 3220 ierr = PetscFree(alphas);CHKERRQ(ierr); 3221 PetscFunctionReturn(0); 3222 } 3223 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatISGetSubassemblingPattern" 3226 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3227 { 3228 Mat subdomain_adj; 3229 IS new_ranks,ranks_send_to; 3230 MatPartitioning partitioner; 3231 Mat_IS *matis; 3232 PetscInt n_neighs,*neighs,*n_shared,**shared; 3233 PetscInt prank; 3234 PetscMPIInt size,rank,color; 3235 PetscInt *xadj,*adjncy,*oldranks; 3236 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3237 PetscInt i,local_size,threshold=0; 3238 PetscErrorCode ierr; 3239 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3240 PetscSubcomm subcomm; 3241 3242 PetscFunctionBegin; 3243 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3244 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3245 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3246 3247 /* Get info on mapping */ 3248 matis = (Mat_IS*)(mat->data); 3249 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3250 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3251 3252 /* build local CSR graph of subdomains' connectivity */ 3253 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3254 xadj[0] = 0; 3255 xadj[1] = PetscMax(n_neighs-1,0); 3256 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3257 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3258 3259 if (threshold) { 3260 PetscInt xadj_count = 0; 3261 for (i=1;i<n_neighs;i++) { 3262 if (n_shared[i] > threshold) { 3263 adjncy[xadj_count] = neighs[i]; 3264 adjncy_wgt[xadj_count] = n_shared[i]; 3265 xadj_count++; 3266 } 3267 } 3268 xadj[1] = xadj_count; 3269 } else { 3270 if (xadj[1]) { 3271 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3272 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3273 } 3274 } 3275 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3276 if (use_square) { 3277 for (i=0;i<xadj[1];i++) { 3278 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3279 } 3280 } 3281 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3282 3283 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3284 3285 /* 3286 Restrict work on active processes only. 3287 */ 3288 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3289 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3290 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3291 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3292 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3293 if (color) { 3294 ierr = PetscFree(xadj);CHKERRQ(ierr); 3295 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3296 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3297 } else { 3298 PetscInt coarsening_ratio; 3299 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3300 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3301 prank = rank; 3302 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3303 /* 3304 for (i=0;i<size;i++) { 3305 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3306 } 3307 */ 3308 for (i=0;i<xadj[1];i++) { 3309 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3310 } 3311 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3312 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3313 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3314 3315 /* Partition */ 3316 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3317 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3318 if (use_vwgt) { 3319 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3320 v_wgt[0] = local_size; 3321 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3322 } 3323 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3324 coarsening_ratio = size/n_subdomains; 3325 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3326 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3327 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3328 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3329 3330 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3331 if (contiguous) { 3332 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3333 } else { 3334 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3335 } 3336 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3337 /* clean up */ 3338 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3339 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3340 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3341 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3342 } 3343 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3344 3345 /* assemble parallel IS for sends */ 3346 i = 1; 3347 if (color) i=0; 3348 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3349 3350 /* get back IS */ 3351 *is_sends = ranks_send_to; 3352 PetscFunctionReturn(0); 3353 } 3354 3355 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatISSubassemble" 3359 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3360 { 3361 Mat local_mat; 3362 Mat_IS *matis; 3363 IS is_sends_internal; 3364 PetscInt rows,cols,new_local_rows; 3365 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3366 PetscBool ismatis,isdense,newisdense,destroy_mat; 3367 ISLocalToGlobalMapping l2gmap; 3368 PetscInt* l2gmap_indices; 3369 const PetscInt* is_indices; 3370 MatType new_local_type; 3371 /* buffers */ 3372 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3373 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3374 PetscInt *recv_buffer_idxs_local; 3375 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3376 /* MPI */ 3377 MPI_Comm comm,comm_n; 3378 PetscSubcomm subcomm; 3379 PetscMPIInt n_sends,n_recvs,commsize; 3380 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3381 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3382 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3383 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3384 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3385 PetscErrorCode ierr; 3386 3387 PetscFunctionBegin; 3388 /* TODO: add missing checks */ 3389 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3390 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3391 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3392 PetscValidLogicalCollectiveInt(mat,nis,7); 3393 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3394 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3395 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3396 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3397 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3398 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3399 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3400 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3401 PetscInt mrows,mcols,mnrows,mncols; 3402 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3403 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3404 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3405 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3406 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3407 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3408 } 3409 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3410 PetscValidLogicalCollectiveInt(mat,bs,0); 3411 /* prepare IS for sending if not provided */ 3412 if (!is_sends) { 3413 PetscBool pcontig = PETSC_TRUE; 3414 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3415 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3416 } else { 3417 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3418 is_sends_internal = is_sends; 3419 } 3420 3421 /* get pointer of MATIS data */ 3422 matis = (Mat_IS*)mat->data; 3423 3424 /* get comm */ 3425 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3426 3427 /* compute number of sends */ 3428 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3429 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3430 3431 /* compute number of receives */ 3432 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3433 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3434 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3435 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3436 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3437 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3438 ierr = PetscFree(iflags);CHKERRQ(ierr); 3439 3440 /* restrict comm if requested */ 3441 subcomm = 0; 3442 destroy_mat = PETSC_FALSE; 3443 if (restrict_comm) { 3444 PetscMPIInt color,subcommsize; 3445 3446 color = 0; 3447 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3448 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3449 subcommsize = commsize - subcommsize; 3450 /* check if reuse has been requested */ 3451 if (reuse == MAT_REUSE_MATRIX) { 3452 if (*mat_n) { 3453 PetscMPIInt subcommsize2; 3454 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3455 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3456 comm_n = PetscObjectComm((PetscObject)*mat_n); 3457 } else { 3458 comm_n = PETSC_COMM_SELF; 3459 } 3460 } else { /* MAT_INITIAL_MATRIX */ 3461 PetscMPIInt rank; 3462 3463 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3464 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3465 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3466 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3467 comm_n = subcomm->comm; 3468 } 3469 /* flag to destroy *mat_n if not significative */ 3470 if (color) destroy_mat = PETSC_TRUE; 3471 } else { 3472 comm_n = comm; 3473 } 3474 3475 /* prepare send/receive buffers */ 3476 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3477 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3478 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3479 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3480 if (nis) { 3481 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3482 } 3483 3484 /* Get data from local matrices */ 3485 if (!isdense) { 3486 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3487 /* TODO: See below some guidelines on how to prepare the local buffers */ 3488 /* 3489 send_buffer_vals should contain the raw values of the local matrix 3490 send_buffer_idxs should contain: 3491 - MatType_PRIVATE type 3492 - PetscInt size_of_l2gmap 3493 - PetscInt global_row_indices[size_of_l2gmap] 3494 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3495 */ 3496 } else { 3497 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3498 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3499 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3500 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3501 send_buffer_idxs[1] = i; 3502 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3503 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3504 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3505 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3506 for (i=0;i<n_sends;i++) { 3507 ilengths_vals[is_indices[i]] = len*len; 3508 ilengths_idxs[is_indices[i]] = len+2; 3509 } 3510 } 3511 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3512 /* additional is (if any) */ 3513 if (nis) { 3514 PetscMPIInt psum; 3515 PetscInt j; 3516 for (j=0,psum=0;j<nis;j++) { 3517 PetscInt plen; 3518 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3519 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3520 psum += len+1; /* indices + lenght */ 3521 } 3522 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3523 for (j=0,psum=0;j<nis;j++) { 3524 PetscInt plen; 3525 const PetscInt *is_array_idxs; 3526 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3527 send_buffer_idxs_is[psum] = plen; 3528 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3529 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3530 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3531 psum += plen+1; /* indices + lenght */ 3532 } 3533 for (i=0;i<n_sends;i++) { 3534 ilengths_idxs_is[is_indices[i]] = psum; 3535 } 3536 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3537 } 3538 3539 buf_size_idxs = 0; 3540 buf_size_vals = 0; 3541 buf_size_idxs_is = 0; 3542 for (i=0;i<n_recvs;i++) { 3543 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3544 buf_size_vals += (PetscInt)olengths_vals[i]; 3545 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3546 } 3547 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3548 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3550 3551 /* get new tags for clean communications */ 3552 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3553 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3554 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3555 3556 /* allocate for requests */ 3557 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3558 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3559 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3560 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3562 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3563 3564 /* communications */ 3565 ptr_idxs = recv_buffer_idxs; 3566 ptr_vals = recv_buffer_vals; 3567 ptr_idxs_is = recv_buffer_idxs_is; 3568 for (i=0;i<n_recvs;i++) { 3569 source_dest = onodes[i]; 3570 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3571 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3572 ptr_idxs += olengths_idxs[i]; 3573 ptr_vals += olengths_vals[i]; 3574 if (nis) { 3575 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3576 ptr_idxs_is += olengths_idxs_is[i]; 3577 } 3578 } 3579 for (i=0;i<n_sends;i++) { 3580 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3581 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3582 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3583 if (nis) { 3584 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3585 } 3586 } 3587 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3588 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3589 3590 /* assemble new l2g map */ 3591 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3592 ptr_idxs = recv_buffer_idxs; 3593 new_local_rows = 0; 3594 for (i=0;i<n_recvs;i++) { 3595 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3596 ptr_idxs += olengths_idxs[i]; 3597 } 3598 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3599 ptr_idxs = recv_buffer_idxs; 3600 new_local_rows = 0; 3601 for (i=0;i<n_recvs;i++) { 3602 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3603 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3604 ptr_idxs += olengths_idxs[i]; 3605 } 3606 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3607 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3608 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3609 3610 /* infer new local matrix type from received local matrices type */ 3611 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3612 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3613 if (n_recvs) { 3614 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3615 ptr_idxs = recv_buffer_idxs; 3616 for (i=0;i<n_recvs;i++) { 3617 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3618 new_local_type_private = MATAIJ_PRIVATE; 3619 break; 3620 } 3621 ptr_idxs += olengths_idxs[i]; 3622 } 3623 switch (new_local_type_private) { 3624 case MATDENSE_PRIVATE: 3625 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3626 new_local_type = MATSEQAIJ; 3627 bs = 1; 3628 } else { /* if I receive only 1 dense matrix */ 3629 new_local_type = MATSEQDENSE; 3630 bs = 1; 3631 } 3632 break; 3633 case MATAIJ_PRIVATE: 3634 new_local_type = MATSEQAIJ; 3635 bs = 1; 3636 break; 3637 case MATBAIJ_PRIVATE: 3638 new_local_type = MATSEQBAIJ; 3639 break; 3640 case MATSBAIJ_PRIVATE: 3641 new_local_type = MATSEQSBAIJ; 3642 break; 3643 default: 3644 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3645 break; 3646 } 3647 } else { /* by default, new_local_type is seqdense */ 3648 new_local_type = MATSEQDENSE; 3649 bs = 1; 3650 } 3651 3652 /* create MATIS object if needed */ 3653 if (reuse == MAT_INITIAL_MATRIX) { 3654 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3655 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3656 } else { 3657 /* it also destroys the local matrices */ 3658 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3659 } 3660 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3661 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3662 3663 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3664 3665 /* Global to local map of received indices */ 3666 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3667 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3668 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3669 3670 /* restore attributes -> type of incoming data and its size */ 3671 buf_size_idxs = 0; 3672 for (i=0;i<n_recvs;i++) { 3673 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3674 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3675 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3676 } 3677 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3678 3679 /* set preallocation */ 3680 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3681 if (!newisdense) { 3682 PetscInt *new_local_nnz=0; 3683 3684 ptr_vals = recv_buffer_vals; 3685 ptr_idxs = recv_buffer_idxs_local; 3686 if (n_recvs) { 3687 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3688 } 3689 for (i=0;i<n_recvs;i++) { 3690 PetscInt j; 3691 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3692 for (j=0;j<*(ptr_idxs+1);j++) { 3693 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3694 } 3695 } else { 3696 /* TODO */ 3697 } 3698 ptr_idxs += olengths_idxs[i]; 3699 } 3700 if (new_local_nnz) { 3701 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3702 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3703 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3704 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3705 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3706 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3707 } else { 3708 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3709 } 3710 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3711 } else { 3712 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3713 } 3714 3715 /* set values */ 3716 ptr_vals = recv_buffer_vals; 3717 ptr_idxs = recv_buffer_idxs_local; 3718 for (i=0;i<n_recvs;i++) { 3719 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3720 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3721 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3722 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3723 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3725 } else { 3726 /* TODO */ 3727 } 3728 ptr_idxs += olengths_idxs[i]; 3729 ptr_vals += olengths_vals[i]; 3730 } 3731 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3732 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3733 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3736 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3737 3738 #if 0 3739 if (!restrict_comm) { /* check */ 3740 Vec lvec,rvec; 3741 PetscReal infty_error; 3742 3743 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3744 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3745 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3746 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3747 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3748 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3749 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3750 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3751 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3752 } 3753 #endif 3754 3755 /* assemble new additional is (if any) */ 3756 if (nis) { 3757 PetscInt **temp_idxs,*count_is,j,psum; 3758 3759 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3760 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3761 ptr_idxs = recv_buffer_idxs_is; 3762 psum = 0; 3763 for (i=0;i<n_recvs;i++) { 3764 for (j=0;j<nis;j++) { 3765 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3766 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3767 psum += plen; 3768 ptr_idxs += plen+1; /* shift pointer to received data */ 3769 } 3770 } 3771 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3772 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3773 for (i=1;i<nis;i++) { 3774 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3775 } 3776 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3777 ptr_idxs = recv_buffer_idxs_is; 3778 for (i=0;i<n_recvs;i++) { 3779 for (j=0;j<nis;j++) { 3780 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3781 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3782 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3783 ptr_idxs += plen+1; /* shift pointer to received data */ 3784 } 3785 } 3786 for (i=0;i<nis;i++) { 3787 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3788 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3789 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3790 } 3791 ierr = PetscFree(count_is);CHKERRQ(ierr); 3792 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3793 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3794 } 3795 /* free workspace */ 3796 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3797 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3798 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3799 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3800 if (isdense) { 3801 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3802 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3803 } else { 3804 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3805 } 3806 if (nis) { 3807 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3808 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3809 } 3810 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3811 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3812 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3813 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3814 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3815 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3816 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3817 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3818 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3819 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3820 ierr = PetscFree(onodes);CHKERRQ(ierr); 3821 if (nis) { 3822 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3823 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3824 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3825 } 3826 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3827 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3828 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3829 for (i=0;i<nis;i++) { 3830 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3831 } 3832 } 3833 PetscFunctionReturn(0); 3834 } 3835 3836 /* temporary hack into ksp private data structure */ 3837 #include <petsc-private/kspimpl.h> 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3841 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3842 { 3843 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3844 PC_IS *pcis = (PC_IS*)pc->data; 3845 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3846 MatNullSpace CoarseNullSpace=NULL; 3847 ISLocalToGlobalMapping coarse_islg; 3848 IS coarse_is,*isarray; 3849 PetscInt i,im_active=-1,active_procs=-1; 3850 PetscInt nis,nisdofs,nisneu; 3851 PC pc_temp; 3852 PCType coarse_pc_type; 3853 KSPType coarse_ksp_type; 3854 PetscBool multilevel_requested,multilevel_allowed; 3855 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3856 Mat t_coarse_mat_is; 3857 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3858 PetscMPIInt all_procs; 3859 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3860 PetscBool compute_vecs = PETSC_FALSE; 3861 PetscScalar *array; 3862 PetscErrorCode ierr; 3863 3864 PetscFunctionBegin; 3865 /* Assign global numbering to coarse dofs */ 3866 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3867 compute_vecs = PETSC_TRUE; 3868 PetscInt ocoarse_size; 3869 ocoarse_size = pcbddc->coarse_size; 3870 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3871 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3872 /* see if we can avoid some work */ 3873 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3874 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3875 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3876 coarse_reuse = PETSC_FALSE; 3877 } else { /* we can safely reuse already computed coarse matrix */ 3878 coarse_reuse = PETSC_TRUE; 3879 } 3880 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3881 coarse_reuse = PETSC_FALSE; 3882 } 3883 /* reset any subassembling information */ 3884 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3885 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3886 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3887 coarse_reuse = PETSC_TRUE; 3888 } 3889 3890 /* count "active" (i.e. with positive local size) and "void" processes */ 3891 im_active = !!(pcis->n); 3892 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3893 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3894 void_procs = all_procs-active_procs; 3895 csin_type_simple = PETSC_TRUE; 3896 redist = PETSC_FALSE; 3897 if (pcbddc->current_level && void_procs) { 3898 csin_ml = PETSC_TRUE; 3899 ncoarse_ml = void_procs; 3900 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3901 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3902 csin_ds = PETSC_TRUE; 3903 ncoarse_ds = pcbddc->redistribute_coarse; 3904 redist = PETSC_TRUE; 3905 } else { 3906 csin_ds = PETSC_TRUE; 3907 ncoarse_ds = active_procs; 3908 redist = PETSC_TRUE; 3909 } 3910 } else { 3911 csin_ml = PETSC_FALSE; 3912 ncoarse_ml = all_procs; 3913 if (void_procs) { 3914 csin_ds = PETSC_TRUE; 3915 ncoarse_ds = void_procs; 3916 csin_type_simple = PETSC_FALSE; 3917 } else { 3918 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3919 csin_ds = PETSC_TRUE; 3920 ncoarse_ds = pcbddc->redistribute_coarse; 3921 redist = PETSC_TRUE; 3922 } else { 3923 csin_ds = PETSC_FALSE; 3924 ncoarse_ds = all_procs; 3925 } 3926 } 3927 } 3928 3929 /* 3930 test if we can go multilevel: three conditions must be satisfied: 3931 - we have not exceeded the number of levels requested 3932 - we can actually subassemble the active processes 3933 - we can find a suitable number of MPI processes where we can place the subassembled problem 3934 */ 3935 multilevel_allowed = PETSC_FALSE; 3936 multilevel_requested = PETSC_FALSE; 3937 if (pcbddc->current_level < pcbddc->max_levels) { 3938 multilevel_requested = PETSC_TRUE; 3939 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3940 multilevel_allowed = PETSC_FALSE; 3941 } else { 3942 multilevel_allowed = PETSC_TRUE; 3943 } 3944 } 3945 /* determine number of process partecipating to coarse solver */ 3946 if (multilevel_allowed) { 3947 ncoarse = ncoarse_ml; 3948 csin = csin_ml; 3949 redist = PETSC_FALSE; 3950 } else { 3951 ncoarse = ncoarse_ds; 3952 csin = csin_ds; 3953 } 3954 3955 /* creates temporary l2gmap and IS for coarse indexes */ 3956 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3957 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3958 3959 /* creates temporary MATIS object for coarse matrix */ 3960 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3961 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3962 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3963 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3964 #if 0 3965 { 3966 PetscViewer viewer; 3967 char filename[256]; 3968 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3969 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3970 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3971 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3972 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3973 } 3974 #endif 3975 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3976 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3977 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3978 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3980 3981 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3982 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3983 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3984 const PetscInt *idxs; 3985 ISLocalToGlobalMapping tmap; 3986 3987 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3988 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3989 /* allocate space for temporary storage */ 3990 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3991 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3992 /* allocate for IS array */ 3993 nisdofs = pcbddc->n_ISForDofsLocal; 3994 nisneu = !!pcbddc->NeumannBoundariesLocal; 3995 nis = nisdofs + nisneu; 3996 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3997 /* dofs splitting */ 3998 for (i=0;i<nisdofs;i++) { 3999 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4000 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4001 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4002 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4003 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4004 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4005 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4006 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4007 } 4008 /* neumann boundaries */ 4009 if (pcbddc->NeumannBoundariesLocal) { 4010 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4011 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4012 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4013 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4014 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4015 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4016 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4017 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4018 } 4019 /* free memory */ 4020 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4021 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4022 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4023 } else { 4024 nis = 0; 4025 nisdofs = 0; 4026 nisneu = 0; 4027 isarray = NULL; 4028 } 4029 /* destroy no longer needed map */ 4030 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4031 4032 /* restrict on coarse candidates (if needed) */ 4033 coarse_mat_is = NULL; 4034 if (csin) { 4035 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4036 if (redist) { 4037 PetscMPIInt rank; 4038 PetscInt spc,n_spc_p1,dest[1],destsize; 4039 4040 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4041 spc = active_procs/ncoarse; 4042 n_spc_p1 = active_procs%ncoarse; 4043 if (im_active) { 4044 destsize = 1; 4045 if (rank > n_spc_p1*(spc+1)-1) { 4046 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4047 } else { 4048 dest[0] = rank/(spc+1); 4049 } 4050 } else { 4051 destsize = 0; 4052 } 4053 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4054 } else if (csin_type_simple) { 4055 PetscMPIInt rank; 4056 PetscInt issize,isidx; 4057 4058 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4059 if (im_active) { 4060 issize = 1; 4061 isidx = (PetscInt)rank; 4062 } else { 4063 issize = 0; 4064 isidx = -1; 4065 } 4066 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4067 } else { /* get a suitable subassembling pattern from MATIS code */ 4068 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4069 } 4070 4071 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4072 if (!redist || ncoarse <= void_procs) { 4073 PetscInt ncoarse_cand,tissize,*nisindices; 4074 PetscInt *coarse_candidates; 4075 const PetscInt* tisindices; 4076 4077 /* get coarse candidates' ranks in pc communicator */ 4078 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4079 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4080 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4081 if (!coarse_candidates[i]) { 4082 coarse_candidates[ncoarse_cand++]=i; 4083 } 4084 } 4085 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4086 4087 4088 if (pcbddc->dbg_flag) { 4089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4091 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4093 for (i=0;i<ncoarse_cand;i++) { 4094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4095 } 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4098 } 4099 /* shift the pattern on coarse candidates */ 4100 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4101 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4102 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4103 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4104 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4105 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4106 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4107 } 4108 if (pcbddc->dbg_flag) { 4109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4111 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 } 4114 } 4115 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4116 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4117 } else { 4118 if (pcbddc->dbg_flag) { 4119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4122 } 4123 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4124 coarse_mat_is = t_coarse_mat_is; 4125 } 4126 4127 /* create local to global scatters for coarse problem */ 4128 if (compute_vecs) { 4129 PetscInt lrows; 4130 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4131 if (coarse_mat_is) { 4132 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4133 } else { 4134 lrows = 0; 4135 } 4136 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4137 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4138 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4139 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4140 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4141 } 4142 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4143 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4144 4145 /* set defaults for coarse KSP and PC */ 4146 if (multilevel_allowed) { 4147 coarse_ksp_type = KSPRICHARDSON; 4148 coarse_pc_type = PCBDDC; 4149 } else { 4150 coarse_ksp_type = KSPPREONLY; 4151 coarse_pc_type = PCREDUNDANT; 4152 } 4153 4154 /* print some info if requested */ 4155 if (pcbddc->dbg_flag) { 4156 if (!multilevel_allowed) { 4157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4158 if (multilevel_requested) { 4159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4160 } else if (pcbddc->max_levels) { 4161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4162 } 4163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4164 } 4165 } 4166 4167 /* create the coarse KSP object only once with defaults */ 4168 if (coarse_mat_is) { 4169 MatReuse coarse_mat_reuse; 4170 PetscViewer dbg_viewer = NULL; 4171 if (pcbddc->dbg_flag) { 4172 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4173 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4174 } 4175 if (!pcbddc->coarse_ksp) { 4176 char prefix[256],str_level[16]; 4177 size_t len; 4178 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4179 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4180 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4181 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4182 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4183 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4184 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4185 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4186 /* prefix */ 4187 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4188 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4189 if (!pcbddc->current_level) { 4190 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4191 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4192 } else { 4193 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4194 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4195 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4196 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4197 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4198 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4199 } 4200 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4201 /* allow user customization */ 4202 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4203 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4204 } 4205 4206 /* get some info after set from options */ 4207 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4208 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4211 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4212 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4213 isbddc = PETSC_FALSE; 4214 } 4215 if (isredundant) { 4216 KSP inner_ksp; 4217 PC inner_pc; 4218 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4219 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4220 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4221 } 4222 4223 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4224 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4225 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4226 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4227 if (nisdofs) { 4228 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4229 for (i=0;i<nisdofs;i++) { 4230 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4231 } 4232 } 4233 if (nisneu) { 4234 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4235 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4236 } 4237 4238 /* assemble coarse matrix */ 4239 if (coarse_reuse) { 4240 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4241 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4242 coarse_mat_reuse = MAT_REUSE_MATRIX; 4243 } else { 4244 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4245 } 4246 if (isbddc || isnn) { 4247 if (pcbddc->coarsening_ratio > 1) { 4248 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4249 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4250 if (pcbddc->dbg_flag) { 4251 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4252 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4253 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4254 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4255 } 4256 } 4257 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4258 } else { 4259 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4260 coarse_mat = coarse_mat_is; 4261 } 4262 } else { 4263 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4264 } 4265 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4266 4267 /* propagate symmetry info to coarse matrix */ 4268 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4269 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4270 4271 /* set operators */ 4272 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4273 if (pcbddc->dbg_flag) { 4274 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4275 } 4276 } else { /* processes non partecipating to coarse solver (if any) */ 4277 coarse_mat = 0; 4278 } 4279 ierr = PetscFree(isarray);CHKERRQ(ierr); 4280 #if 0 4281 { 4282 PetscViewer viewer; 4283 char filename[256]; 4284 sprintf(filename,"coarse_mat.m"); 4285 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4286 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4287 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4288 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4289 } 4290 #endif 4291 4292 /* Compute coarse null space (special handling by BDDC only) */ 4293 if (pcbddc->NullSpace) { 4294 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4295 } 4296 4297 if (pcbddc->coarse_ksp) { 4298 Vec crhs,csol; 4299 PetscBool ispreonly; 4300 if (CoarseNullSpace) { 4301 if (isbddc) { 4302 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4303 } else { 4304 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4305 } 4306 } 4307 /* setup coarse ksp */ 4308 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4309 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4310 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4311 /* hack */ 4312 if (!csol) { 4313 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4314 } 4315 if (!crhs) { 4316 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4317 } 4318 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4319 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4320 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4321 KSP check_ksp; 4322 KSPType check_ksp_type; 4323 PC check_pc; 4324 Vec check_vec,coarse_vec; 4325 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4326 PetscInt its; 4327 PetscBool compute_eigs; 4328 PetscReal *eigs_r,*eigs_c; 4329 PetscInt neigs; 4330 const char *prefix; 4331 4332 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4333 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4334 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4335 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4336 if (ispreonly) { 4337 check_ksp_type = KSPPREONLY; 4338 compute_eigs = PETSC_FALSE; 4339 } else { 4340 check_ksp_type = KSPGMRES; 4341 compute_eigs = PETSC_TRUE; 4342 } 4343 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4344 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4345 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4346 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4347 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4348 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4349 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4350 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4351 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4352 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4353 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4354 /* create random vec */ 4355 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4356 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4357 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4358 if (CoarseNullSpace) { 4359 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4360 } 4361 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4362 /* solve coarse problem */ 4363 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4364 if (CoarseNullSpace) { 4365 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4366 } 4367 /* set eigenvalue estimation if preonly has not been requested */ 4368 if (compute_eigs) { 4369 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4370 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4371 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4372 lambda_max = eigs_r[neigs-1]; 4373 lambda_min = eigs_r[0]; 4374 if (pcbddc->use_coarse_estimates) { 4375 if (lambda_max>lambda_min) { 4376 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4377 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4378 } 4379 } 4380 } 4381 4382 /* check coarse problem residual error */ 4383 if (pcbddc->dbg_flag) { 4384 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4385 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4386 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4387 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4388 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4389 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4390 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4391 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4392 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4393 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4394 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4395 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4396 if (compute_eigs) { 4397 PetscReal lambda_max_s,lambda_min_s; 4398 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4399 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4400 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4401 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4402 for (i=0;i<neigs;i++) { 4403 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4404 } 4405 } 4406 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4407 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4408 } 4409 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4410 if (compute_eigs) { 4411 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4412 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4413 } 4414 } 4415 } 4416 /* print additional info */ 4417 if (pcbddc->dbg_flag) { 4418 /* waits until all processes reaches this point */ 4419 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4420 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4422 } 4423 4424 /* free memory */ 4425 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4426 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4432 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4433 { 4434 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4435 PC_IS* pcis = (PC_IS*)pc->data; 4436 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4437 PetscInt i,coarse_size; 4438 PetscInt *local_primal_indices; 4439 PetscErrorCode ierr; 4440 4441 PetscFunctionBegin; 4442 /* Compute global number of coarse dofs */ 4443 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4444 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4445 } 4446 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4447 4448 /* check numbering */ 4449 if (pcbddc->dbg_flag) { 4450 PetscScalar coarsesum,*array; 4451 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4452 4453 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4456 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4457 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4458 for (i=0;i<pcbddc->local_primal_size;i++) { 4459 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4460 } 4461 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4462 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4463 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4464 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4465 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4466 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4467 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4468 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4469 for (i=0;i<pcis->n;i++) { 4470 if (array[i] == 1.0) { 4471 set_error = PETSC_TRUE; 4472 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4473 } 4474 } 4475 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4476 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4477 for (i=0;i<pcis->n;i++) { 4478 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4479 } 4480 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4481 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4482 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4483 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4484 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4486 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4488 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4489 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4490 for (i=0;i<pcbddc->local_primal_size;i++) { 4491 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4492 } 4493 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4494 } 4495 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4496 if (set_error_reduced) { 4497 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4498 } 4499 } 4500 /* get back data */ 4501 *coarse_size_n = coarse_size; 4502 *local_primal_indices_n = local_primal_indices; 4503 PetscFunctionReturn(0); 4504 } 4505 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "PCBDDCGlobalToLocal" 4508 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4509 { 4510 IS localis_t; 4511 PetscInt i,lsize,*idxs,n; 4512 PetscScalar *vals; 4513 PetscErrorCode ierr; 4514 4515 PetscFunctionBegin; 4516 /* get indices in local ordering exploiting local to global map */ 4517 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4518 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4519 for (i=0;i<lsize;i++) vals[i] = 1.0; 4520 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4521 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4522 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4523 if (idxs) { /* multilevel guard */ 4524 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4525 } 4526 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4527 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4528 ierr = PetscFree(vals);CHKERRQ(ierr); 4529 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4530 /* now compute set in local ordering */ 4531 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4532 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4533 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4534 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4535 for (i=0,lsize=0;i<n;i++) { 4536 if (PetscRealPart(vals[i]) > 0.5) { 4537 lsize++; 4538 } 4539 } 4540 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4541 for (i=0,lsize=0;i<n;i++) { 4542 if (PetscRealPart(vals[i]) > 0.5) { 4543 idxs[lsize++] = i; 4544 } 4545 } 4546 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4547 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4548 *localis = localis_t; 4549 PetscFunctionReturn(0); 4550 } 4551 4552 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4553 #undef __FUNCT__ 4554 #define __FUNCT__ "PCBDDCMatMult_Private" 4555 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4556 { 4557 PCBDDCChange_ctx change_ctx; 4558 PetscErrorCode ierr; 4559 4560 PetscFunctionBegin; 4561 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4562 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4563 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4564 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4570 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4571 { 4572 PCBDDCChange_ctx change_ctx; 4573 PetscErrorCode ierr; 4574 4575 PetscFunctionBegin; 4576 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4577 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4578 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4579 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 #undef __FUNCT__ 4584 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4585 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4586 { 4587 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4588 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4589 PetscInt *used_xadj,*used_adjncy; 4590 PetscBool free_used_adj; 4591 PetscErrorCode ierr; 4592 4593 PetscFunctionBegin; 4594 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4595 free_used_adj = PETSC_FALSE; 4596 if (pcbddc->sub_schurs_layers == -1) { 4597 used_xadj = NULL; 4598 used_adjncy = NULL; 4599 } else { 4600 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4601 used_xadj = pcbddc->mat_graph->xadj; 4602 used_adjncy = pcbddc->mat_graph->adjncy; 4603 } else if (pcbddc->computed_rowadj) { 4604 used_xadj = pcbddc->mat_graph->xadj; 4605 used_adjncy = pcbddc->mat_graph->adjncy; 4606 } else { 4607 PetscBool flg_row=PETSC_FALSE; 4608 const PetscInt *xadj,*adjncy; 4609 PetscInt nvtxs; 4610 4611 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4612 if (flg_row) { 4613 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4614 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4615 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4616 free_used_adj = PETSC_TRUE; 4617 } else { 4618 pcbddc->sub_schurs_layers = -1; 4619 used_xadj = NULL; 4620 used_adjncy = NULL; 4621 } 4622 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4623 } 4624 } 4625 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4626 4627 /* free adjacency */ 4628 if (free_used_adj) { 4629 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4630 } 4631 PetscFunctionReturn(0); 4632 } 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "PCBDDCInitSubSchurs" 4636 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4637 { 4638 PC_IS *pcis=(PC_IS*)pc->data; 4639 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4640 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4641 PCBDDCGraph graph; 4642 Mat S_j; 4643 PetscErrorCode ierr; 4644 4645 PetscFunctionBegin; 4646 /* attach interface graph for determining subsets */ 4647 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4648 IS verticesIS; 4649 4650 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4651 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4652 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4655 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4656 /* 4657 if (pcbddc->dbg_flag) { 4658 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4659 } 4660 */ 4661 } else { 4662 graph = pcbddc->mat_graph; 4663 } 4664 4665 /* Create Schur complement matrix */ 4666 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4667 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4668 4669 /* sub_schurs init */ 4670 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4671 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4672 /* free graph struct */ 4673 if (pcbddc->sub_schurs_rebuild) { 4674 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4675 } 4676 PetscFunctionReturn(0); 4677 } 4678