1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 /* max size of subsets */ 36 mss = 0; 37 for (i=0;i<sub_schurs->n_subs;i++) { 38 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 39 PetscInt subset_size; 40 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 41 mss = PetscMax(mss,subset_size); 42 } 43 } 44 45 /* min/max and threshold */ 46 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 47 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 48 nmax = PetscMax(nmin,nmax); 49 if (pcbddc->adaptive_threshold > 1.0) { 50 thresh = 1.0/pcbddc->adaptive_threshold; 51 } else { 52 thresh = 1.0; 53 } 54 55 /* allocate lapack workspace */ 56 cum = cum2 = 0; 57 maxneigs = 0; 58 for (i=0;i<sub_schurs->n_subs;i++) { 59 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 60 PetscInt n,subset_size; 61 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 62 n = PetscMin(subset_size,nmax); 63 cum += subset_size*n; 64 cum2 += n; 65 maxneigs = PetscMax(maxneigs,n); 66 } 67 } 68 69 if (mss) { 70 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 71 PetscBLASInt B_itype = 1; 72 PetscBLASInt B_N = mss; 73 PetscReal zero = 0.0; 74 PetscReal eps = 0.0; /* dlamch? */ 75 76 B_lwork = -1; 77 S = NULL; 78 St = NULL; 79 eigs = NULL; 80 eigv = NULL; 81 B_iwork = NULL; 82 B_ifail = NULL; 83 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 84 #if defined(PETSC_USE_COMPLEX) 85 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 86 #else 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 88 #endif 89 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 90 ierr = PetscFPTrapPop();CHKERRQ(ierr); 91 } else { 92 /* TODO */ 93 } 94 } else { 95 lwork = 0; 96 } 97 98 nv = 0; 99 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 100 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 101 } 102 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 103 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 104 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 105 #if defined(PETSC_USE_COMPLEX) 106 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 107 #endif 108 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 109 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 110 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 111 nv+cum,&pcbddc->adaptive_constraints_idxs, 112 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 113 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 114 115 maxneigs = 0; 116 cum = cum2 = cumarray = 0; 117 if (sub_schurs->is_Ej_com) { 118 const PetscInt *idxs; 119 120 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 121 for (cum=0;cum<nv;cum++) { 122 pcbddc->adaptive_constraints_n[cum] = 1; 123 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 124 pcbddc->adaptive_constraints_ptrs[cum] = cum; 125 pcbddc->adaptive_constraints_data[cum] = 1.0; 126 } 127 cum2 = cum; 128 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 } 130 131 if (mss) { /* multilevel */ 132 if (pcbddc->use_deluxe_scaling) { 133 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 134 } else { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 136 } 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 138 } 139 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,scalar_zero = 0.0; 147 PetscReal zero=0.0; 148 PetscBLASInt B_N; 149 150 /* S should be copied since we need it for deluxe scaling */ 151 if (sub_schurs->is_hermitian) { 152 PetscInt j; 153 for (j=0;j<subset_size;j++) { 154 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 155 } 156 for (j=0;j<subset_size;j++) { 157 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 158 } 159 } else { 160 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 } 163 /* is always this the right matrix? */ 164 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 166 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 167 /* St = Starray+cumarray; */ 168 169 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 170 PetscBLASInt B_itype = 1; 171 PetscBLASInt B_IL = 1, B_IU; 172 PetscReal eps = -1.0; /* dlamch? */ 173 PetscInt nmin_s; 174 175 /* ask for eigenvalues lower than thresh */ 176 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 177 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 178 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 179 #if defined(PETSC_USE_COMPLEX) 180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 181 #else 182 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 183 #endif 184 ierr = PetscFPTrapPop();CHKERRQ(ierr); 185 if (B_ierr) { 186 if (B_ierr < 0 ) { 187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 188 } else if (B_ierr <= B_N) { 189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 190 } else { 191 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 192 } 193 } 194 195 if (B_neigs > nmax) { 196 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 197 B_neigs = nmax; 198 } 199 200 nmin_s = PetscMin(nmin,B_N); 201 if (B_neigs < nmin_s) { 202 PetscBLASInt B_neigs2; 203 204 B_IL = B_neigs + 1; 205 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 206 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 207 if (sub_schurs->is_hermitian) { 208 PetscInt j; 209 for (j=0;j<subset_size;j++) { 210 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 211 } 212 for (j=0;j<subset_size;j++) { 213 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 214 } 215 } else { 216 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 217 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 } 219 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 220 #if defined(PETSC_USE_COMPLEX) 221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 222 #else 223 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 224 #endif 225 ierr = PetscFPTrapPop();CHKERRQ(ierr); 226 B_neigs += B_neigs2; 227 } 228 if (B_ierr) { 229 if (B_ierr < 0 ) { 230 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 231 } else if (B_ierr <= B_N) { 232 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 233 } else { 234 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 235 } 236 } 237 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 238 for (j=0;j<B_neigs;j++) { 239 if (eigs[j] == 0.0) { 240 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 241 } else { 242 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 243 } 244 } 245 } else { 246 /* TODO */ 247 } 248 maxneigs = PetscMax(B_neigs,maxneigs); 249 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 250 251 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 252 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 253 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 #if 0 255 PetscInt ii; 256 for (ii=0;ii<B_neigs;ii++) { 257 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 258 for (j=0;j<B_N;j++) { 259 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 260 } 261 } 262 #endif 263 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 264 for (j=0;j<B_neigs;j++) { 265 #if 0 266 { 267 PetscBLASInt Blas_N,Blas_one = 1.0; 268 PetscScalar norm; 269 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 270 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 271 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 272 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 273 } else { 274 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 275 } 276 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 277 } 278 #endif 279 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 280 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 281 cum2 += subset_size; 282 } 283 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 284 } 285 /* shift for next computation */ 286 cumarray += subset_size*subset_size; 287 } 288 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 289 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 290 291 if (mss) { 292 if (pcbddc->use_deluxe_scaling) { 293 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 294 } else { 295 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 296 } 297 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 298 } 299 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 300 #if defined(PETSC_USE_COMPLEX) 301 ierr = PetscFree(rwork);CHKERRQ(ierr); 302 #endif 303 if (pcbddc->dbg_flag) { 304 PetscInt maxneigs_r; 305 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 306 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "PCBDDCSetUpSolvers" 313 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 314 { 315 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 316 PetscScalar *coarse_submat_vals; 317 PetscErrorCode ierr; 318 319 PetscFunctionBegin; 320 /* Setup local scatters R_to_B and (optionally) R_to_D */ 321 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 322 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 323 324 /* Setup local neumann solver ksp_R */ 325 /* PCBDDCSetUpLocalScatters should be called first! */ 326 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 327 328 /* Change global null space passed in by the user if change of basis has been requested */ 329 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 330 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 331 } 332 333 /* 334 Setup local correction and local part of coarse basis. 335 Gives back the dense local part of the coarse matrix in column major ordering 336 */ 337 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 338 339 /* Compute total number of coarse nodes and setup coarse solver */ 340 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 341 342 /* free */ 343 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "PCBDDCResetCustomization" 349 PetscErrorCode PCBDDCResetCustomization(PC pc) 350 { 351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 356 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 357 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 358 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 361 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 362 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 363 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 364 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "PCBDDCResetTopography" 371 PetscErrorCode PCBDDCResetTopography(PC pc) 372 { 373 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 378 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 380 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 381 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCBDDCResetSolvers" 387 PetscErrorCode PCBDDCResetSolvers(PC pc) 388 { 389 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 390 PetscScalar *array; 391 PetscErrorCode ierr; 392 393 PetscFunctionBegin; 394 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 395 if (pcbddc->coarse_phi_B) { 396 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 397 ierr = PetscFree(array);CHKERRQ(ierr); 398 } 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 /* Get submatrices from subdomain matrix */ 652 if (n_vertices) { 653 Mat newmat; 654 IS is_aux; 655 PetscInt ibs,mbs; 656 PetscBool issbaij; 657 658 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 659 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 660 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 661 if (ibs != mbs) { /* need to convert to SEQAIJ */ 662 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 666 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 667 } else { 668 /* this is safe */ 669 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 671 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 672 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 673 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 674 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 675 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 676 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 677 } else { 678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 679 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 680 } 681 } 682 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 683 } 684 685 /* Matrix of coarse basis functions (local) */ 686 if (pcbddc->coarse_phi_B) { 687 PetscInt on_B,on_primal,on_D=n_D; 688 if (pcbddc->coarse_phi_D) { 689 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 690 } 691 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 692 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 693 PetscScalar *marray; 694 695 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 696 ierr = PetscFree(marray);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 701 } 702 } 703 704 if (!pcbddc->coarse_phi_B) { 705 PetscScalar *marray; 706 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 n *= 2; 713 } 714 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 n = n_B*pcbddc->local_primal_size; 717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 n += n_D*pcbddc->local_primal_size; 720 } 721 if (!pcbddc->issym) { 722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 724 n = n_B*pcbddc->local_primal_size; 725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 729 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 731 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 732 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 733 } 734 } 735 } 736 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 737 /* vertices */ 738 if (n_vertices) { 739 740 if (n_R) { 741 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 742 PetscBLASInt B_N,B_one = 1; 743 PetscScalar *x,*y; 744 745 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 747 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 748 if (F) { 749 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 750 } else { 751 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 752 for (i=0;i<n_vertices;i++) { 753 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 754 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 758 } 759 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 760 } 761 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 762 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 763 if (n_constraints) { 764 Mat B; 765 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 767 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 768 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 769 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 770 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 771 ierr = MatDestroy(&B);CHKERRQ(ierr); 772 } 773 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 774 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 775 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 776 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 778 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 779 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 780 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 781 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 782 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 783 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 784 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 785 } else { 786 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 787 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 788 } 789 /* coarse basis functions */ 790 for (i=0;i<n_vertices;i++) { 791 PetscScalar *y; 792 793 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 794 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 795 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 y[n_B*i+idx_V_B[i]] = 1.0; 799 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 800 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 801 802 if (pcbddc->switch_static || pcbddc->dbg_flag) { 803 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 804 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 805 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 808 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 809 } 810 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 811 } 812 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 813 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 814 } 815 816 if (n_constraints) { 817 Mat B; 818 819 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 820 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 822 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 823 if (n_vertices) { 824 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 825 } 826 ierr = MatDestroy(&B);CHKERRQ(ierr); 827 /* coarse basis functions */ 828 for (i=0;i<n_constraints;i++) { 829 PetscScalar *y; 830 831 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 837 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 838 839 if (pcbddc->switch_static || pcbddc->dbg_flag) { 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 846 } 847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 848 } 849 } 850 851 /* compute other basis functions for non-symmetric problems */ 852 if (!pcbddc->issym) { 853 Mat B,X; 854 855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 856 857 if (n_constraints) { 858 Mat S_CCT,B_C; 859 860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 861 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 862 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 863 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 864 if (n_vertices) { 865 Mat B_V,S_VCT; 866 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 868 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 869 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 871 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 872 } 873 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 874 } 875 if (n_vertices && n_R) { 876 Mat A_VRT; 877 PetscBLASInt B_N,B_one = 1; 878 879 if (!n_constraints) { /* if there are no constraints, reset work */ 880 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 883 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 884 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 885 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 886 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 887 } 888 889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 890 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 891 for (i=0;i<pcbddc->local_primal_size;i++) { 892 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 893 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 894 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 896 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 897 } 898 } else { 899 for (i=0;i<pcbddc->local_primal_size;i++) { 900 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 902 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 904 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 905 } 906 } 907 ierr = MatDestroy(&B);CHKERRQ(ierr); 908 /* coarse basis functions */ 909 for (i=0;i<pcbddc->local_primal_size;i++) { 910 PetscScalar *y; 911 912 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 913 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 if (i<n_vertices) { 918 y[n_B*i+idx_V_B[i]] = 1.0; 919 } 920 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 921 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 922 923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 924 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 925 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 926 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 929 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 930 } 931 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 932 } 933 ierr = MatDestroy(&X);CHKERRQ(ierr); 934 } 935 936 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 940 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 941 /* Checking coarse_sub_mat and coarse basis functios */ 942 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 944 if (pcbddc->dbg_flag) { 945 Mat coarse_sub_mat; 946 Mat AUXMAT,TM1,TM2,TM3,TM4; 947 Mat coarse_phi_D,coarse_phi_B; 948 Mat coarse_psi_D,coarse_psi_B; 949 Mat A_II,A_BB,A_IB,A_BI; 950 MatType checkmattype=MATSEQAIJ; 951 PetscReal real_value; 952 953 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 956 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 958 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 959 if (unsymmetric_check) { 960 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 961 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 962 } 963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 964 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 968 if (unsymmetric_check) { 969 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 970 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 971 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 972 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 973 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 974 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 975 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 976 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 977 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 978 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 979 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 980 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 981 } else { 982 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 983 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 984 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 985 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 986 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 987 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 988 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 989 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 990 } 991 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 994 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 995 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 996 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 997 998 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1004 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1010 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1011 if (unsymmetric_check) { 1012 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1013 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1014 } 1015 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1016 } 1017 1018 /* free memory */ 1019 ierr = PetscFree(work);CHKERRQ(ierr); 1020 if (n_vertices) { 1021 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1022 } 1023 if (n_constraints) { 1024 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1025 } 1026 /* get back data */ 1027 *coarse_submat_vals_n = coarse_submat_vals; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1034 { 1035 Mat *work_mat; 1036 IS isrow_s,iscol_s; 1037 PetscBool rsorted,csorted; 1038 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1039 PetscErrorCode ierr; 1040 1041 PetscFunctionBegin; 1042 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1043 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1045 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1046 1047 if (!rsorted) { 1048 const PetscInt *idxs; 1049 PetscInt *idxs_sorted,i; 1050 1051 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1052 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1053 for (i=0;i<rsize;i++) { 1054 idxs_perm_r[i] = i; 1055 } 1056 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1057 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1058 for (i=0;i<rsize;i++) { 1059 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1060 } 1061 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1063 } else { 1064 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1065 isrow_s = isrow; 1066 } 1067 1068 if (!csorted) { 1069 if (isrow == iscol) { 1070 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1071 iscol_s = isrow_s; 1072 } else { 1073 const PetscInt *idxs; 1074 PetscInt *idxs_sorted,i; 1075 1076 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1077 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1078 for (i=0;i<csize;i++) { 1079 idxs_perm_c[i] = i; 1080 } 1081 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1082 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1083 for (i=0;i<csize;i++) { 1084 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1085 } 1086 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1087 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1088 } 1089 } else { 1090 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1091 iscol_s = iscol; 1092 } 1093 1094 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1095 1096 if (!rsorted || !csorted) { 1097 Mat new_mat; 1098 IS is_perm_r,is_perm_c; 1099 1100 if (!rsorted) { 1101 PetscInt *idxs_r,i; 1102 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1103 for (i=0;i<rsize;i++) { 1104 idxs_r[idxs_perm_r[i]] = i; 1105 } 1106 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1108 } else { 1109 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1110 } 1111 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1112 1113 if (!csorted) { 1114 if (isrow_s == iscol_s) { 1115 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1116 is_perm_c = is_perm_r; 1117 } else { 1118 PetscInt *idxs_c,i; 1119 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1120 for (i=0;i<csize;i++) { 1121 idxs_c[idxs_perm_c[i]] = i; 1122 } 1123 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1124 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1125 } 1126 } else { 1127 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1128 } 1129 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1130 1131 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1132 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1133 work_mat[0] = new_mat; 1134 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1135 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1136 } 1137 1138 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1139 *B = work_mat[0]; 1140 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1141 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1142 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1149 { 1150 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1152 Mat new_mat; 1153 IS is_local,is_global; 1154 PetscInt local_size; 1155 PetscBool isseqaij; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1160 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1161 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1162 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1163 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1164 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1165 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1166 1167 /* check */ 1168 if (pcbddc->dbg_flag) { 1169 Vec x,x_change; 1170 PetscReal error; 1171 1172 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1173 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1174 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1178 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1180 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1181 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x);CHKERRQ(ierr); 1185 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1186 } 1187 1188 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1189 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1190 if (isseqaij) { 1191 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1192 } else { 1193 Mat work_mat; 1194 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1195 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1197 } 1198 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1199 /* 1200 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1201 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1202 */ 1203 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1210 { 1211 PC_IS* pcis = (PC_IS*)(pc->data); 1212 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1213 IS is_aux1,is_aux2; 1214 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1215 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1216 PetscInt vbs,bs; 1217 PetscBT bitmask; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 /* 1222 No need to setup local scatters if 1223 - primal space is unchanged 1224 AND 1225 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1226 AND 1227 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1228 */ 1229 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1230 PetscFunctionReturn(0); 1231 } 1232 /* destroy old objects */ 1233 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1235 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1236 /* Set Non-overlapping dimensions */ 1237 n_B = pcis->n_B; n_D = pcis->n - n_B; 1238 n_vertices = pcbddc->n_actual_vertices; 1239 /* create auxiliary bitmask */ 1240 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1241 for (i=0;i<n_vertices;i++) { 1242 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1243 } 1244 1245 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1246 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1247 for (i=0, n_R=0; i<pcis->n; i++) { 1248 if (!PetscBTLookup(bitmask,i)) { 1249 idx_R_local[n_R] = i; 1250 n_R++; 1251 } 1252 } 1253 1254 /* Block code */ 1255 vbs = 1; 1256 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1257 if (bs>1 && !(n_vertices%bs)) { 1258 PetscBool is_blocked = PETSC_TRUE; 1259 PetscInt *vary; 1260 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1261 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1262 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1263 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1264 for (i=0; i<n_vertices; i++) { 1265 if (vary[i]!=0 && vary[i]!=bs) { 1266 is_blocked = PETSC_FALSE; 1267 break; 1268 } 1269 } 1270 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1271 vbs = bs; 1272 for (i=0;i<n_R/vbs;i++) { 1273 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1274 } 1275 } 1276 ierr = PetscFree(vary);CHKERRQ(ierr); 1277 } 1278 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1279 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1280 1281 /* print some info if requested */ 1282 if (pcbddc->dbg_flag) { 1283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1291 } 1292 1293 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1294 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1296 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1297 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 for (i=0; i<n_D; i++) { 1299 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1300 } 1301 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1302 for (i=0, j=0; i<n_R; i++) { 1303 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1304 aux_array1[j++] = i; 1305 } 1306 } 1307 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1308 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1309 for (i=0, j=0; i<n_B; i++) { 1310 if (!PetscBTLookup(bitmask,is_indices[i])) { 1311 aux_array2[j++] = i; 1312 } 1313 } 1314 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1315 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1316 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1318 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1319 1320 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1321 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1322 for (i=0, j=0; i<n_R; i++) { 1323 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1324 aux_array1[j++] = i; 1325 } 1326 } 1327 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1328 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1329 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1330 } 1331 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1340 { 1341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1342 PC_IS *pcis = (PC_IS*)pc->data; 1343 PC pc_temp; 1344 Mat A_RR; 1345 MatReuse reuse; 1346 PetscScalar m_one = -1.0; 1347 PetscReal value; 1348 PetscInt n_D,n_R,ibs,mbs; 1349 PetscBool use_exact,use_exact_reduced,issbaij; 1350 PetscErrorCode ierr; 1351 /* prefixes stuff */ 1352 char dir_prefix[256],neu_prefix[256],str_level[16]; 1353 size_t len; 1354 1355 PetscFunctionBegin; 1356 1357 /* compute prefixes */ 1358 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1359 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1360 if (!pcbddc->current_level) { 1361 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1363 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1364 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1367 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1368 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1369 len -= 15; /* remove "pc_bddc_coarse_" */ 1370 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1371 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1372 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1374 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1376 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1377 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1378 } 1379 1380 /* DIRICHLET PROBLEM */ 1381 if (dirichlet) { 1382 /* Matrix for Dirichlet problem is pcis->A_II */ 1383 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1384 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1385 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1386 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1387 /* default */ 1388 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1389 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1391 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1392 if (issbaij) { 1393 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1394 } else { 1395 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1396 } 1397 /* Allow user's customization */ 1398 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1399 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1400 } 1401 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1402 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1403 if (!n_D) { 1404 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1405 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1406 } 1407 /* Set Up KSP for Dirichlet problem of BDDC */ 1408 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1409 /* set ksp_D into pcis data */ 1410 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1411 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1412 pcis->ksp_D = pcbddc->ksp_D; 1413 } 1414 1415 /* NEUMANN PROBLEM */ 1416 A_RR = 0; 1417 if (neumann) { 1418 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1419 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1420 if (pcbddc->ksp_R) { /* already created ksp */ 1421 PetscInt nn_R; 1422 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1423 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1424 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1425 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1426 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1427 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1428 reuse = MAT_INITIAL_MATRIX; 1429 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1430 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1431 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1432 reuse = MAT_INITIAL_MATRIX; 1433 } else { /* safe to reuse the matrix */ 1434 reuse = MAT_REUSE_MATRIX; 1435 } 1436 } 1437 /* last check */ 1438 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1439 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1440 reuse = MAT_INITIAL_MATRIX; 1441 } 1442 } else { /* first time, so we need to create the matrix */ 1443 reuse = MAT_INITIAL_MATRIX; 1444 } 1445 /* extract A_RR */ 1446 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1447 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1448 if (ibs != mbs) { 1449 Mat newmat; 1450 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1451 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1452 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1453 } else { 1454 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1455 } 1456 if (!pcbddc->ksp_R) { /* create object if not present */ 1457 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1458 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1459 /* default */ 1460 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1461 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1462 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1464 if (issbaij) { 1465 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1466 } else { 1467 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1468 } 1469 /* Allow user's customization */ 1470 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1471 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1472 } 1473 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1474 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1475 if (!n_R) { 1476 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1477 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1478 } 1479 /* Set Up KSP for Neumann problem of BDDC */ 1480 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1481 } 1482 1483 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1484 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1485 if (pcbddc->dbg_flag) { 1486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1489 } 1490 if (dirichlet) { /* Dirichlet */ 1491 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1492 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1495 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1496 /* need to be adapted? */ 1497 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1498 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1499 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1500 /* print info */ 1501 if (pcbddc->dbg_flag) { 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1504 } 1505 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1506 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1507 } 1508 } 1509 if (neumann) { /* Neumann */ 1510 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1511 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1514 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1515 /* need to be adapted? */ 1516 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1517 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1518 /* print info */ 1519 if (pcbddc->dbg_flag) { 1520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1522 } 1523 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1524 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 /* free Neumann problem's matrix */ 1529 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1535 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1536 { 1537 PetscErrorCode ierr; 1538 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1539 1540 PetscFunctionBegin; 1541 if (applytranspose) { 1542 if (pcbddc->local_auxmat1) { 1543 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1544 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1545 } 1546 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1547 } else { 1548 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1549 if (pcbddc->local_auxmat1) { 1550 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1551 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1552 } 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1560 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1561 { 1562 PetscErrorCode ierr; 1563 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1564 PC_IS* pcis = (PC_IS*) (pc->data); 1565 const PetscScalar zero = 0.0; 1566 1567 PetscFunctionBegin; 1568 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1569 if (applytranspose) { 1570 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1571 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1572 } else { 1573 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1574 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1575 } 1576 /* start communications from local primal nodes to rhs of coarse solver */ 1577 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 1581 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1582 /* TODO remove null space when doing multilevel */ 1583 if (pcbddc->coarse_ksp) { 1584 if (applytranspose) { 1585 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1586 } else { 1587 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1588 } 1589 } 1590 1591 /* Local solution on R nodes */ 1592 if (pcis->n) { 1593 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1594 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { 1597 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1599 } 1600 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1601 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1602 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (pcbddc->switch_static) { 1605 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 } 1608 } 1609 1610 /* communications from coarse sol to local primal nodes */ 1611 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 1614 /* Sum contributions from two levels */ 1615 if (applytranspose) { 1616 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1617 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1618 } else { 1619 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1620 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1627 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1628 { 1629 PetscErrorCode ierr; 1630 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1631 PetscScalar *array; 1632 Vec from,to; 1633 1634 PetscFunctionBegin; 1635 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1636 from = pcbddc->coarse_vec; 1637 to = pcbddc->vec1_P; 1638 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1639 Vec tvec; 1640 1641 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1642 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1643 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1644 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1645 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1646 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1647 } 1648 } else { /* from local to global -> put data in coarse right hand side */ 1649 from = pcbddc->vec1_P; 1650 to = pcbddc->coarse_vec; 1651 } 1652 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1658 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1659 { 1660 PetscErrorCode ierr; 1661 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1662 PetscScalar *array; 1663 Vec from,to; 1664 1665 PetscFunctionBegin; 1666 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1667 from = pcbddc->coarse_vec; 1668 to = pcbddc->vec1_P; 1669 } else { /* from local to global -> put data in coarse right hand side */ 1670 from = pcbddc->vec1_P; 1671 to = pcbddc->coarse_vec; 1672 } 1673 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1674 if (smode == SCATTER_FORWARD) { 1675 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1676 Vec tvec; 1677 1678 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1679 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1680 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1681 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1682 } 1683 } else { 1684 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1685 ierr = VecResetArray(from);CHKERRQ(ierr); 1686 } 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /* uncomment for testing purposes */ 1692 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1695 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1696 { 1697 PetscErrorCode ierr; 1698 PC_IS* pcis = (PC_IS*)(pc->data); 1699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1700 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1701 /* one and zero */ 1702 PetscScalar one=1.0,zero=0.0; 1703 /* space to store constraints and their local indices */ 1704 PetscScalar *temp_quadrature_constraint; 1705 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1706 /* iterators */ 1707 PetscInt i,j,k,total_counts,temp_start_ptr; 1708 /* BLAS integers */ 1709 PetscBLASInt lwork,lierr; 1710 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1711 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1712 /* reuse */ 1713 PetscInt olocal_primal_size; 1714 PetscInt *oprimal_indices_local_idxs; 1715 /* change of basis */ 1716 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1717 PetscBool boolforchange,qr_needed; 1718 PetscBT touched,change_basis,qr_needed_idx; 1719 /* auxiliary stuff */ 1720 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1721 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1722 PetscScalar *temp_quadrature_constraint_work=NULL; 1723 /* some quantities */ 1724 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1725 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1726 1727 PetscFunctionBegin; 1728 /* Destroy Mat objects computed previously */ 1729 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1731 1732 /* print some info */ 1733 if (pcbddc->dbg_flag) { 1734 IS vertices; 1735 PetscInt nv,nedges,nfaces; 1736 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1737 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1738 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1739 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1741 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_edges);CHKERRQ(ierr); 1743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nedges,pcbddc->use_faces);CHKERRQ(ierr); 1744 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1745 } 1746 1747 if (!pcbddc->adaptive_selection) { 1748 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1749 MatNullSpace nearnullsp; 1750 const Vec *nearnullvecs; 1751 Vec *localnearnullsp; 1752 PetscScalar *array; 1753 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1754 PetscBool nnsp_has_cnst; 1755 /* LAPACK working arrays for SVD or POD */ 1756 PetscBool skip_lapack; 1757 PetscScalar *work; 1758 PetscReal *singular_vals; 1759 #if defined(PETSC_USE_COMPLEX) 1760 PetscReal *rwork; 1761 #endif 1762 #if defined(PETSC_MISSING_LAPACK_GESVD) 1763 PetscScalar *temp_basis,*correlation_mat; 1764 #else 1765 PetscBLASInt dummy_int; 1766 PetscScalar dummy_scalar; 1767 #endif 1768 1769 /* Get index sets for faces, edges and vertices from graph */ 1770 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1771 /* free unneeded index sets */ 1772 if (!pcbddc->use_vertices) { 1773 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1774 } 1775 if (!pcbddc->use_edges) { 1776 for (i=0;i<n_ISForEdges;i++) { 1777 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1778 } 1779 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1780 n_ISForEdges = 0; 1781 } 1782 if (!pcbddc->use_faces) { 1783 for (i=0;i<n_ISForFaces;i++) { 1784 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1785 } 1786 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1787 n_ISForFaces = 0; 1788 } 1789 /* HACKS (the following two blocks of code) */ 1790 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1791 pcbddc->use_change_of_basis = PETSC_TRUE; 1792 if (!ISForEdges) { 1793 pcbddc->use_change_on_faces = PETSC_TRUE; 1794 } 1795 } 1796 if (pcbddc->NullSpace) { 1797 /* use_change_of_basis should be consistent among processors */ 1798 PetscBool tbool[2],gbool[2]; 1799 tbool [0] = pcbddc->use_change_of_basis; 1800 tbool [1] = pcbddc->use_change_on_faces; 1801 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1802 pcbddc->use_change_of_basis = gbool[0]; 1803 pcbddc->use_change_on_faces = gbool[1]; 1804 } 1805 1806 /* check if near null space is attached to global mat */ 1807 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1808 if (nearnullsp) { 1809 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1810 /* remove any stored info */ 1811 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1812 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1813 /* store information for BDDC solver reuse */ 1814 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1815 pcbddc->onearnullspace = nearnullsp; 1816 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1817 for (i=0;i<nnsp_size;i++) { 1818 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1819 } 1820 } else { /* if near null space is not provided BDDC uses constants by default */ 1821 nnsp_size = 0; 1822 nnsp_has_cnst = PETSC_TRUE; 1823 } 1824 /* get max number of constraints on a single cc */ 1825 max_constraints = nnsp_size; 1826 if (nnsp_has_cnst) max_constraints++; 1827 1828 /* 1829 Evaluate maximum storage size needed by the procedure 1830 - temp_indices will contain start index of each constraint stored as follows 1831 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1832 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1833 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1834 */ 1835 total_counts = n_ISForFaces+n_ISForEdges; 1836 total_counts *= max_constraints; 1837 n_vertices = 0; 1838 if (ISForVertices) { 1839 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1840 } 1841 total_counts += n_vertices; 1842 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1843 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1844 total_counts = 0; 1845 max_size_of_constraint = 0; 1846 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1847 if (i<n_ISForEdges) { 1848 used_IS = &ISForEdges[i]; 1849 } else { 1850 used_IS = &ISForFaces[i-n_ISForEdges]; 1851 } 1852 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1853 total_counts += j; 1854 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1855 } 1856 total_counts *= max_constraints; 1857 total_counts += n_vertices; 1858 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1859 /* get local part of global near null space vectors */ 1860 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1861 for (k=0;k<nnsp_size;k++) { 1862 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1863 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1864 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 } 1866 1867 /* whether or not to skip lapack calls */ 1868 skip_lapack = PETSC_TRUE; 1869 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1870 1871 /* allocate some auxiliary stuff */ 1872 if (!skip_lapack || pcbddc->use_qr_single) { 1873 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1874 } else { 1875 gidxs = NULL; 1876 permutation = NULL; 1877 temp_indices_to_constraint_work = NULL; 1878 temp_quadrature_constraint_work = NULL; 1879 } 1880 1881 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1882 if (!skip_lapack) { 1883 PetscScalar temp_work; 1884 1885 #if defined(PETSC_MISSING_LAPACK_GESVD) 1886 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1887 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1888 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1889 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1890 #if defined(PETSC_USE_COMPLEX) 1891 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1892 #endif 1893 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1894 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1895 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1896 lwork = -1; 1897 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1898 #if !defined(PETSC_USE_COMPLEX) 1899 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1900 #else 1901 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1902 #endif 1903 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1904 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1905 #else /* on missing GESVD */ 1906 /* SVD */ 1907 PetscInt max_n,min_n; 1908 max_n = max_size_of_constraint; 1909 min_n = max_constraints; 1910 if (max_size_of_constraint < max_constraints) { 1911 min_n = max_size_of_constraint; 1912 max_n = max_constraints; 1913 } 1914 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1915 #if defined(PETSC_USE_COMPLEX) 1916 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1917 #endif 1918 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1919 lwork = -1; 1920 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1921 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1922 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1923 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1924 #if !defined(PETSC_USE_COMPLEX) 1925 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1926 #else 1927 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1928 #endif 1929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1930 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1931 #endif /* on missing GESVD */ 1932 /* Allocate optimal workspace */ 1933 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1934 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1935 } 1936 /* Now we can loop on constraining sets */ 1937 total_counts = 0; 1938 temp_indices[0] = 0; 1939 /* vertices */ 1940 if (ISForVertices) { 1941 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1942 if (nnsp_has_cnst) { /* consider all vertices */ 1943 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1944 for (i=0;i<n_vertices;i++) { 1945 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1946 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1947 total_counts++; 1948 } 1949 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1950 PetscBool used_vertex; 1951 for (i=0;i<n_vertices;i++) { 1952 used_vertex = PETSC_FALSE; 1953 k = 0; 1954 while (!used_vertex && k<nnsp_size) { 1955 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1956 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1957 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1958 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1959 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1960 total_counts++; 1961 used_vertex = PETSC_TRUE; 1962 } 1963 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1964 k++; 1965 } 1966 } 1967 } 1968 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1969 n_vertices = total_counts; 1970 } 1971 1972 /* edges and faces */ 1973 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1974 if (ncc<n_ISForEdges) { 1975 used_IS = &ISForEdges[ncc]; 1976 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1977 } else { 1978 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1979 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1980 } 1981 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1982 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1983 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1984 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1985 /* change of basis should not be performed on local periodic nodes */ 1986 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1987 if (nnsp_has_cnst) { 1988 PetscScalar quad_value; 1989 temp_constraints++; 1990 if (!pcbddc->use_nnsp_true) { 1991 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1992 } else { 1993 quad_value = 1.0; 1994 } 1995 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1996 for (j=0;j<size_of_constraint;j++) { 1997 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1998 } 1999 /* sort by global ordering if using lapack subroutines (not needed!) */ 2000 if (!skip_lapack || pcbddc->use_qr_single) { 2001 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2002 for (j=0;j<size_of_constraint;j++) { 2003 permutation[j]=j; 2004 } 2005 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2006 for (j=0;j<size_of_constraint;j++) { 2007 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2008 } 2009 for (j=0;j<size_of_constraint;j++) { 2010 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2011 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2012 } 2013 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2014 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2015 } 2016 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2017 total_counts++; 2018 } 2019 for (k=0;k<nnsp_size;k++) { 2020 PetscReal real_value; 2021 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2022 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2023 for (j=0;j<size_of_constraint;j++) { 2024 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2025 } 2026 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2027 /* check if array is null on the connected component */ 2028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2029 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2030 if (real_value > 0.0) { /* keep indices and values */ 2031 /* sort by global ordering if using lapack subroutines */ 2032 if (!skip_lapack || pcbddc->use_qr_single) { 2033 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2034 for (j=0;j<size_of_constraint;j++) { 2035 permutation[j]=j; 2036 } 2037 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2038 for (j=0;j<size_of_constraint;j++) { 2039 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2040 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2041 } 2042 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2043 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2044 } 2045 temp_constraints++; 2046 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2047 total_counts++; 2048 } 2049 } 2050 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2051 valid_constraints = temp_constraints; 2052 if (!pcbddc->use_nnsp_true && temp_constraints) { 2053 if (temp_constraints == 1) { /* just normalize the constraint */ 2054 PetscScalar norm; 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2056 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2057 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2058 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2059 } else { /* perform SVD */ 2060 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2061 2062 #if defined(PETSC_MISSING_LAPACK_GESVD) 2063 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2064 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2065 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2066 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2067 from that computed using LAPACKgesvd 2068 -> This is due to a different computation of eigenvectors in LAPACKheev 2069 -> The quality of the POD-computed basis will be the same */ 2070 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2071 /* Store upper triangular part of correlation matrix */ 2072 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2073 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2074 for (j=0;j<temp_constraints;j++) { 2075 for (k=0;k<j+1;k++) { 2076 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2077 } 2078 } 2079 /* compute eigenvalues and eigenvectors of correlation matrix */ 2080 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2081 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2082 #if !defined(PETSC_USE_COMPLEX) 2083 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2084 #else 2085 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2086 #endif 2087 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2088 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2089 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2090 j = 0; 2091 while (j < temp_constraints && singular_vals[j] < tol) j++; 2092 total_counts = total_counts-j; 2093 valid_constraints = temp_constraints-j; 2094 /* scale and copy POD basis into used quadrature memory */ 2095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2097 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2098 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2099 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2100 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2101 if (j<temp_constraints) { 2102 PetscInt ii; 2103 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2104 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2105 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2106 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2107 for (k=0;k<temp_constraints-j;k++) { 2108 for (ii=0;ii<size_of_constraint;ii++) { 2109 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2110 } 2111 } 2112 } 2113 #else /* on missing GESVD */ 2114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2115 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2116 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2117 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2118 #if !defined(PETSC_USE_COMPLEX) 2119 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2120 #else 2121 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2122 #endif 2123 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2125 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2126 k = temp_constraints; 2127 if (k > size_of_constraint) k = size_of_constraint; 2128 j = 0; 2129 while (j < k && singular_vals[k-j-1] < tol) j++; 2130 valid_constraints = k-j; 2131 total_counts = total_counts-temp_constraints+valid_constraints; 2132 #endif /* on missing GESVD */ 2133 } 2134 } 2135 /* setting change_of_basis flag is safe now */ 2136 if (boolforchange) { 2137 for (j=0;j<valid_constraints;j++) { 2138 PetscBTSet(change_basis,total_counts-j-1); 2139 } 2140 } 2141 } 2142 /* free workspace */ 2143 if (!skip_lapack || pcbddc->use_qr_single) { 2144 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2145 } 2146 if (!skip_lapack) { 2147 ierr = PetscFree(work);CHKERRQ(ierr); 2148 #if defined(PETSC_USE_COMPLEX) 2149 ierr = PetscFree(rwork);CHKERRQ(ierr); 2150 #endif 2151 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2152 #if defined(PETSC_MISSING_LAPACK_GESVD) 2153 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2154 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2155 #endif 2156 } 2157 for (k=0;k<nnsp_size;k++) { 2158 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2159 } 2160 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2161 /* free index sets of faces, edges and vertices */ 2162 for (i=0;i<n_ISForFaces;i++) { 2163 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2164 } 2165 if (n_ISForFaces) { 2166 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2167 } 2168 for (i=0;i<n_ISForEdges;i++) { 2169 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2170 } 2171 if (n_ISForEdges) { 2172 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2173 } 2174 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2175 } else { 2176 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2177 PetscInt cum = 0; 2178 2179 total_counts = 0; 2180 n_vertices = 0; 2181 if (sub_schurs->is_Ej_com) { 2182 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2183 } 2184 max_constraints = 0; 2185 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2186 total_counts += pcbddc->adaptive_constraints_n[i]; 2187 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2188 } 2189 temp_indices = pcbddc->adaptive_constraints_ptrs; 2190 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2191 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2192 2193 #if 0 2194 printf("Found %d totals\n",total_counts); 2195 for (i=0;i<total_counts;i++) { 2196 printf("const %d, start %d",i,temp_indices[i]); 2197 printf(" end %d:\n",temp_indices[i+1]); 2198 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2199 printf(" idxs %d",temp_indices_to_constraint[j]); 2200 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2201 } 2202 } 2203 for (i=0;i<n_vertices;i++) { 2204 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2205 } 2206 for (i=0;i<sub_schurs->n_subs;i++) { 2207 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2208 } 2209 #endif 2210 2211 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2212 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2213 /* Change of basis */ 2214 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2215 if (pcbddc->use_change_of_basis) { 2216 cum = n_vertices; 2217 for (i=0;i<sub_schurs->n_subs;i++) { 2218 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2219 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2220 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2221 } 2222 } 2223 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2224 } 2225 } 2226 } 2227 2228 /* map temp_indices_to_constraint in boundary numbering */ 2229 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2230 if (i != temp_indices[total_counts]) { 2231 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2232 } 2233 2234 /* set quantities in pcbddc data structure and store previous primal size */ 2235 /* n_vertices defines the number of subdomain corners in the primal space */ 2236 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2237 olocal_primal_size = pcbddc->local_primal_size; 2238 pcbddc->local_primal_size = total_counts; 2239 pcbddc->n_vertices = n_vertices; 2240 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2241 2242 /* Create constraint matrix */ 2243 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2244 /* so we need to set it up properly either with or without change of basis */ 2245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2246 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2247 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2248 /* array to compute a local numbering of constraints : vertices first then constraints */ 2249 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2250 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2251 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2252 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2253 /* auxiliary stuff for basis change */ 2254 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2255 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2256 2257 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2258 total_primal_vertices=0; 2259 for (i=0;i<pcbddc->local_primal_size;i++) { 2260 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2261 if (size_of_constraint == 1) { 2262 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2263 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2264 aux_primal_minloc[total_primal_vertices]=0; 2265 total_primal_vertices++; 2266 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2267 PetscInt min_loc,min_index; 2268 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2269 /* find first untouched local node */ 2270 k = 0; 2271 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2272 min_index = global_indices[k]; 2273 min_loc = k; 2274 /* search the minimum among global nodes already untouched on the cc */ 2275 for (k=1;k<size_of_constraint;k++) { 2276 /* there can be more than one constraint on a single connected component */ 2277 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2278 min_index = global_indices[k]; 2279 min_loc = k; 2280 } 2281 } 2282 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2283 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2284 aux_primal_minloc[total_primal_vertices]=min_loc; 2285 total_primal_vertices++; 2286 } 2287 } 2288 /* determine if a QR strategy is needed for change of basis */ 2289 qr_needed = PETSC_FALSE; 2290 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2291 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2292 if (PetscBTLookup(change_basis,i)) { 2293 if (!pcbddc->use_qr_single) { 2294 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2295 j = 0; 2296 for (k=0;k<size_of_constraint;k++) { 2297 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2298 j++; 2299 } 2300 } 2301 /* found more than one primal dof on the cc */ 2302 if (j > 1) { 2303 PetscBTSet(qr_needed_idx,i); 2304 qr_needed = PETSC_TRUE; 2305 } 2306 } else { 2307 PetscBTSet(qr_needed_idx,i); 2308 qr_needed = PETSC_TRUE; 2309 } 2310 } 2311 } 2312 /* free workspace */ 2313 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2314 2315 /* permute indices in order to have a sorted set of vertices */ 2316 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2317 2318 /* nonzero structure of constraint matrix */ 2319 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2320 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2321 j=total_primal_vertices; 2322 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2323 if (!PetscBTLookup(change_basis,i)) { 2324 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2325 j++; 2326 } 2327 } 2328 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2329 ierr = PetscFree(nnz);CHKERRQ(ierr); 2330 /* set values in constraint matrix */ 2331 for (i=0;i<total_primal_vertices;i++) { 2332 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2333 } 2334 total_counts = total_primal_vertices; 2335 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2336 if (!PetscBTLookup(change_basis,i)) { 2337 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2338 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2339 total_counts++; 2340 } 2341 } 2342 /* assembling */ 2343 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2345 /* 2346 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2347 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2348 */ 2349 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2350 if (pcbddc->use_change_of_basis) { 2351 /* dual and primal dofs on a single cc */ 2352 PetscInt dual_dofs,primal_dofs; 2353 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2354 PetscInt primal_counter; 2355 /* working stuff for GEQRF */ 2356 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2357 PetscBLASInt lqr_work; 2358 /* working stuff for UNGQR */ 2359 PetscScalar *gqr_work,lgqr_work_t; 2360 PetscBLASInt lgqr_work; 2361 /* working stuff for TRTRS */ 2362 PetscScalar *trs_rhs; 2363 PetscBLASInt Blas_NRHS; 2364 /* pointers for values insertion into change of basis matrix */ 2365 PetscInt *start_rows,*start_cols; 2366 PetscScalar *start_vals; 2367 /* working stuff for values insertion */ 2368 PetscBT is_primal; 2369 /* matrix sizes */ 2370 PetscInt global_size,local_size; 2371 /* temporary change of basis */ 2372 Mat localChangeOfBasisMatrix; 2373 /* extra space for debugging */ 2374 PetscScalar *dbg_work; 2375 2376 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2377 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2378 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2379 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2380 /* nonzeros for local mat */ 2381 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2382 for (i=0;i<pcis->n;i++) nnz[i]=1; 2383 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2384 if (PetscBTLookup(change_basis,i)) { 2385 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2386 if (PetscBTLookup(qr_needed_idx,i)) { 2387 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2388 } else { 2389 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2390 /* get local primal index on the cc */ 2391 j = 0; 2392 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2393 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2394 } 2395 } 2396 } 2397 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2398 ierr = PetscFree(nnz);CHKERRQ(ierr); 2399 /* Set initial identity in the matrix */ 2400 for (i=0;i<pcis->n;i++) { 2401 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2402 } 2403 2404 if (pcbddc->dbg_flag) { 2405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2407 } 2408 2409 2410 /* Now we loop on the constraints which need a change of basis */ 2411 /* 2412 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2413 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2414 2415 Basic blocks of change of basis matrix T computed by 2416 2417 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2418 2419 | 1 0 ... 0 s_1/S | 2420 | 0 1 ... 0 s_2/S | 2421 | ... | 2422 | 0 ... 1 s_{n-1}/S | 2423 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2424 2425 with S = \sum_{i=1}^n s_i^2 2426 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2427 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2428 2429 - QR decomposition of constraints otherwise 2430 */ 2431 if (qr_needed) { 2432 /* space to store Q */ 2433 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2434 /* first we issue queries for optimal work */ 2435 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2436 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2437 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2438 lqr_work = -1; 2439 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2440 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2441 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2442 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2443 lgqr_work = -1; 2444 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2445 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2446 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2447 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2448 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2449 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2450 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2451 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2452 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2453 /* array to store scaling factors for reflectors */ 2454 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2455 /* array to store rhs and solution of triangular solver */ 2456 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2457 /* allocating workspace for check */ 2458 if (pcbddc->dbg_flag) { 2459 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2460 } 2461 } 2462 /* array to store whether a node is primal or not */ 2463 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2464 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2465 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2466 if (i != total_primal_vertices) { 2467 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2468 } 2469 for (i=0;i<total_primal_vertices;i++) { 2470 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2471 } 2472 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2473 2474 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2475 /* -> using implicit ordering contained in temp_indices data */ 2476 total_counts = pcbddc->n_vertices; 2477 primal_counter = total_counts; 2478 while (total_counts<pcbddc->local_primal_size) { 2479 primal_dofs = 1; 2480 if (PetscBTLookup(change_basis,total_counts)) { 2481 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2482 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2483 primal_dofs++; 2484 } 2485 /* get constraint info */ 2486 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2487 dual_dofs = size_of_constraint-primal_dofs; 2488 2489 if (pcbddc->dbg_flag) { 2490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2491 } 2492 2493 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2494 2495 /* copy quadrature constraints for change of basis check */ 2496 if (pcbddc->dbg_flag) { 2497 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2498 } 2499 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2500 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2501 2502 /* compute QR decomposition of constraints */ 2503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2504 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2506 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2507 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2508 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2510 2511 /* explictly compute R^-T */ 2512 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2513 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2514 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2515 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2517 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2518 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2519 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2520 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2521 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2522 2523 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2526 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2528 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2529 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2530 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2532 2533 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2534 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2535 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2540 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2541 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2542 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2543 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2544 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2545 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2546 2547 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2548 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2549 /* insert cols for primal dofs */ 2550 for (j=0;j<primal_dofs;j++) { 2551 start_vals = &qr_basis[j*size_of_constraint]; 2552 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2553 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 /* insert cols for dual dofs */ 2556 for (j=0,k=0;j<dual_dofs;k++) { 2557 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2558 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2559 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2560 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2561 j++; 2562 } 2563 } 2564 2565 /* check change of basis */ 2566 if (pcbddc->dbg_flag) { 2567 PetscInt ii,jj; 2568 PetscBool valid_qr=PETSC_TRUE; 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2574 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2575 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2576 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2577 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2578 for (jj=0;jj<size_of_constraint;jj++) { 2579 for (ii=0;ii<primal_dofs;ii++) { 2580 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2581 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2582 } 2583 } 2584 if (!valid_qr) { 2585 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2586 for (jj=0;jj<size_of_constraint;jj++) { 2587 for (ii=0;ii<primal_dofs;ii++) { 2588 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2590 } 2591 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2592 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2593 } 2594 } 2595 } 2596 } else { 2597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2598 } 2599 } 2600 } else { /* simple transformation block */ 2601 PetscInt row,col; 2602 PetscScalar val,norm; 2603 2604 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2605 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2606 for (j=0;j<size_of_constraint;j++) { 2607 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2608 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2609 if (!PetscBTLookup(is_primal,row_B)) { 2610 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2611 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2612 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2613 } else { 2614 for (k=0;k<size_of_constraint;k++) { 2615 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2616 if (row != col) { 2617 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2618 } else { 2619 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2620 } 2621 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2622 } 2623 } 2624 } 2625 if (pcbddc->dbg_flag) { 2626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2627 } 2628 } 2629 /* increment primal counter */ 2630 primal_counter += primal_dofs; 2631 } else { 2632 if (pcbddc->dbg_flag) { 2633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2634 } 2635 } 2636 /* increment constraint counter total_counts */ 2637 total_counts += primal_dofs; 2638 } 2639 2640 /* free workspace */ 2641 if (qr_needed) { 2642 if (pcbddc->dbg_flag) { 2643 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2644 } 2645 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2646 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2647 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2648 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2649 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2650 } 2651 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2652 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2653 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2654 2655 /* assembling of global change of variable */ 2656 { 2657 Mat tmat; 2658 PetscInt bs; 2659 2660 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2661 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2662 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2663 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2664 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2665 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2666 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2667 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2668 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2669 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2670 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2671 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2672 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2673 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2674 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2675 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2676 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2677 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2678 } 2679 /* check */ 2680 if (pcbddc->dbg_flag) { 2681 PetscReal error; 2682 Vec x,x_change; 2683 2684 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2685 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2686 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2687 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2688 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2689 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2691 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2692 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2693 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2694 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2695 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2696 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2697 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2698 ierr = VecDestroy(&x);CHKERRQ(ierr); 2699 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2700 } 2701 2702 /* adapt sub_schurs computed (if any) */ 2703 if (pcbddc->use_deluxe_scaling) { 2704 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2705 if (sub_schurs->n_subs_par_g) { 2706 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2707 } 2708 if (sub_schurs->S_Ej_all) { 2709 Mat S_1,S_2,tmat; 2710 IS is_all_N; 2711 2712 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2713 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2714 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2715 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2716 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2717 sub_schurs->S_Ej_all = S_1; 2718 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2719 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2720 sub_schurs->sum_S_Ej_all = S_2; 2721 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2722 } 2723 } 2724 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2725 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2726 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2727 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2728 } 2729 2730 /* set up change of basis context */ 2731 if (pcbddc->ChangeOfBasisMatrix) { 2732 PCBDDCChange_ctx change_ctx; 2733 2734 if (!pcbddc->new_global_mat) { 2735 PetscInt global_size,local_size; 2736 2737 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2738 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2739 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2740 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2741 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2742 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2743 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2744 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2745 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2746 } else { 2747 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2748 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2749 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2750 } 2751 if (!pcbddc->user_ChangeOfBasisMatrix) { 2752 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2753 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2754 } else { 2755 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2756 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2757 } 2758 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2759 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2760 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 } 2763 2764 /* get indices in local ordering for vertices and constraints */ 2765 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2766 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2767 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2768 } 2769 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2771 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2772 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2773 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2774 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2775 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2776 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2777 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2778 /* set quantities in PCBDDC data struct */ 2779 pcbddc->n_actual_vertices = i; 2780 /* check if a new primal space has been introduced */ 2781 pcbddc->new_primal_space_local = PETSC_TRUE; 2782 if (olocal_primal_size == pcbddc->local_primal_size) { 2783 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2784 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2785 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2786 } 2787 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2788 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2789 2790 /* flush dbg viewer */ 2791 if (pcbddc->dbg_flag) { 2792 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2793 } 2794 2795 /* free workspace */ 2796 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2797 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2798 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2799 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2800 if (!pcbddc->adaptive_selection) { 2801 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2802 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2803 } else { 2804 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2805 pcbddc->adaptive_constraints_ptrs, 2806 pcbddc->adaptive_constraints_idxs, 2807 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2808 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2809 } 2810 PetscFunctionReturn(0); 2811 } 2812 2813 #undef __FUNCT__ 2814 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2815 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2816 { 2817 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2818 PC_IS *pcis = (PC_IS*)pc->data; 2819 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2820 PetscInt ierr,i,vertex_size; 2821 PetscViewer viewer=pcbddc->dbg_viewer; 2822 2823 PetscFunctionBegin; 2824 /* Reset previously computed graph */ 2825 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2826 /* Init local Graph struct */ 2827 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2828 2829 /* Check validity of the csr graph passed in by the user */ 2830 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2831 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2832 } 2833 2834 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2835 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2836 PetscInt *xadj,*adjncy; 2837 PetscInt nvtxs; 2838 2839 if (pcbddc->use_local_adj) { 2840 PetscBool flg_row=PETSC_FALSE; 2841 2842 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2843 if (flg_row) { 2844 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2845 pcbddc->computed_rowadj = PETSC_TRUE; 2846 } 2847 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2848 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2849 IS is_dummy; 2850 ISLocalToGlobalMapping l2gmap_dummy; 2851 PetscInt j,sum; 2852 PetscInt *cxadj,*cadjncy; 2853 const PetscInt *idxs; 2854 PCBDDCGraph graph; 2855 PetscBT is_on_boundary; 2856 2857 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2858 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2859 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2860 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2861 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2862 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2863 graph->xadj = xadj; 2864 graph->adjncy = adjncy; 2865 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2866 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2867 2868 if (pcbddc->dbg_flag) { 2869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2870 for (i=0;i<graph->ncc;i++) { 2871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2872 } 2873 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2874 } 2875 2876 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2877 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2878 for (i=0;i<pcis->n_B;i++) { 2879 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2880 } 2881 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2882 2883 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2884 sum = 0; 2885 for (i=0;i<graph->ncc;i++) { 2886 PetscInt sizecc = 0; 2887 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2888 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2889 sizecc++; 2890 } 2891 } 2892 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2893 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2894 cxadj[graph->queue[j]] = sizecc; 2895 } 2896 } 2897 sum += sizecc*sizecc; 2898 } 2899 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2900 sum = 0; 2901 for (i=0;i<nvtxs;i++) { 2902 PetscInt temp = cxadj[i]; 2903 cxadj[i] = sum; 2904 sum += temp; 2905 } 2906 cxadj[nvtxs] = sum; 2907 for (i=0;i<graph->ncc;i++) { 2908 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2909 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2910 PetscInt k,sizecc = 0; 2911 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2912 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2913 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2914 sizecc++; 2915 } 2916 } 2917 } 2918 } 2919 } 2920 if (nvtxs) { 2921 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2922 } else { 2923 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2924 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2925 } 2926 graph->xadj = 0; 2927 graph->adjncy = 0; 2928 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2929 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2930 } 2931 } 2932 2933 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2934 vertex_size = 1; 2935 if (pcbddc->user_provided_isfordofs) { 2936 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2937 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2938 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2939 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2940 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2941 } 2942 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2943 pcbddc->n_ISForDofs = 0; 2944 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2945 } 2946 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2947 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2948 } else { 2949 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2950 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2951 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2952 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2953 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2954 } 2955 } 2956 } 2957 2958 /* Setup of Graph */ 2959 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2960 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2961 } 2962 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2963 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2964 } 2965 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2966 2967 /* Graph's connected components analysis */ 2968 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2969 2970 /* print some info to stdout */ 2971 if (pcbddc->dbg_flag) { 2972 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2973 } 2974 2975 /* mark topography has done */ 2976 pcbddc->recompute_topography = PETSC_FALSE; 2977 PetscFunctionReturn(0); 2978 } 2979 2980 #undef __FUNCT__ 2981 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2982 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2983 { 2984 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2985 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2986 PetscErrorCode ierr; 2987 2988 PetscFunctionBegin; 2989 n = 0; 2990 vertices = 0; 2991 if (pcbddc->ConstraintMatrix) { 2992 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2993 for (i=0;i<local_primal_size;i++) { 2994 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2995 if (size_of_constraint == 1) n++; 2996 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2997 } 2998 if (vertices_idx) { 2999 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3000 n = 0; 3001 for (i=0;i<local_primal_size;i++) { 3002 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3003 if (size_of_constraint == 1) { 3004 vertices[n++]=row_cmat_indices[0]; 3005 } 3006 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3007 } 3008 } 3009 } 3010 *n_vertices = n; 3011 if (vertices_idx) *vertices_idx = vertices; 3012 PetscFunctionReturn(0); 3013 } 3014 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3017 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3018 { 3019 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3020 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3021 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3022 PetscBT touched; 3023 PetscErrorCode ierr; 3024 3025 /* This function assumes that the number of local constraints per connected component 3026 is not greater than the number of nodes defined for the connected component 3027 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3028 PetscFunctionBegin; 3029 n = 0; 3030 constraints_index = 0; 3031 if (pcbddc->ConstraintMatrix) { 3032 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3033 max_size_of_constraint = 0; 3034 for (i=0;i<local_primal_size;i++) { 3035 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3036 if (size_of_constraint > 1) { 3037 n++; 3038 } 3039 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3040 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3041 } 3042 if (constraints_idx) { 3043 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3044 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3045 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3046 n = 0; 3047 for (i=0;i<local_primal_size;i++) { 3048 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3049 if (size_of_constraint > 1) { 3050 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3051 /* find first untouched local node */ 3052 j = 0; 3053 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3054 min_index = row_cmat_global_indices[j]; 3055 min_loc = j; 3056 /* search the minimum among nodes not yet touched on the connected component 3057 since there can be more than one constraint on a single cc */ 3058 for (j=1;j<size_of_constraint;j++) { 3059 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3060 min_index = row_cmat_global_indices[j]; 3061 min_loc = j; 3062 } 3063 } 3064 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3065 constraints_index[n++] = row_cmat_indices[min_loc]; 3066 } 3067 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3068 } 3069 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3070 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3071 } 3072 } 3073 *n_constraints = n; 3074 if (constraints_idx) *constraints_idx = constraints_index; 3075 PetscFunctionReturn(0); 3076 } 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "PCBDDCSubsetNumbering" 3080 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3081 { 3082 Vec local_vec,global_vec; 3083 IS seqis,paris; 3084 VecScatter scatter_ctx; 3085 PetscScalar *array; 3086 PetscInt *temp_global_dofs; 3087 PetscScalar globalsum; 3088 PetscInt i,j,s; 3089 PetscInt nlocals,first_index,old_index,max_local; 3090 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3091 PetscMPIInt *dof_sizes,*dof_displs; 3092 PetscBool first_found; 3093 PetscErrorCode ierr; 3094 3095 PetscFunctionBegin; 3096 /* mpi buffers */ 3097 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3098 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3099 j = ( !rank_prec_comm ? size_prec_comm : 0); 3100 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3101 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3102 /* get maximum size of subset */ 3103 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3104 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3105 max_local = 0; 3106 for (i=0;i<n_local_dofs;i++) { 3107 if (max_local < temp_global_dofs[i] ) { 3108 max_local = temp_global_dofs[i]; 3109 } 3110 } 3111 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3112 max_global++; 3113 max_local = 0; 3114 for (i=0;i<n_local_dofs;i++) { 3115 if (max_local < local_dofs[i] ) { 3116 max_local = local_dofs[i]; 3117 } 3118 } 3119 max_local++; 3120 /* allocate workspace */ 3121 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3122 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3123 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3124 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3125 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3126 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3127 /* create scatter */ 3128 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3129 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3130 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3131 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3132 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3133 /* init array */ 3134 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3135 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3136 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3137 if (local_dofs_mult) { 3138 for (i=0;i<n_local_dofs;i++) { 3139 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3140 } 3141 } else { 3142 for (i=0;i<n_local_dofs;i++) { 3143 array[local_dofs[i]]=1.0; 3144 } 3145 } 3146 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3147 /* scatter into global vec and get total number of global dofs */ 3148 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3149 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3150 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3151 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3152 /* Fill global_vec with cumulative function for global numbering */ 3153 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3154 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3155 nlocals = 0; 3156 first_index = -1; 3157 first_found = PETSC_FALSE; 3158 for (i=0;i<s;i++) { 3159 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3160 first_found = PETSC_TRUE; 3161 first_index = i; 3162 } 3163 nlocals += (PetscInt)PetscRealPart(array[i]); 3164 } 3165 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3166 if (!rank_prec_comm) { 3167 dof_displs[0]=0; 3168 for (i=1;i<size_prec_comm;i++) { 3169 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3170 } 3171 } 3172 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3173 if (first_found) { 3174 array[first_index] += (PetscScalar)nlocals; 3175 old_index = first_index; 3176 for (i=first_index+1;i<s;i++) { 3177 if (PetscRealPart(array[i]) > 0.1) { 3178 array[i] += array[old_index]; 3179 old_index = i; 3180 } 3181 } 3182 } 3183 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3184 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3185 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3186 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3187 /* get global ordering of local dofs */ 3188 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3189 if (local_dofs_mult) { 3190 for (i=0;i<n_local_dofs;i++) { 3191 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3192 } 3193 } else { 3194 for (i=0;i<n_local_dofs;i++) { 3195 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3196 } 3197 } 3198 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3199 /* free workspace */ 3200 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3201 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3202 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3203 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3204 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3205 /* return pointer to global ordering of local dofs */ 3206 *global_numbering_subset = temp_global_dofs; 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3212 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3213 { 3214 PetscInt i,j; 3215 PetscScalar *alphas; 3216 PetscErrorCode ierr; 3217 3218 PetscFunctionBegin; 3219 /* this implements stabilized Gram-Schmidt */ 3220 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3221 for (i=0;i<n;i++) { 3222 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3223 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3224 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3225 } 3226 ierr = PetscFree(alphas);CHKERRQ(ierr); 3227 PetscFunctionReturn(0); 3228 } 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatISGetSubassemblingPattern" 3232 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3233 { 3234 Mat subdomain_adj; 3235 IS new_ranks,ranks_send_to; 3236 MatPartitioning partitioner; 3237 Mat_IS *matis; 3238 PetscInt n_neighs,*neighs,*n_shared,**shared; 3239 PetscInt prank; 3240 PetscMPIInt size,rank,color; 3241 PetscInt *xadj,*adjncy,*oldranks; 3242 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3243 PetscInt i,local_size,threshold=0; 3244 PetscErrorCode ierr; 3245 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3246 PetscSubcomm subcomm; 3247 3248 PetscFunctionBegin; 3249 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3250 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3251 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3252 3253 /* Get info on mapping */ 3254 matis = (Mat_IS*)(mat->data); 3255 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3256 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3257 3258 /* build local CSR graph of subdomains' connectivity */ 3259 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3260 xadj[0] = 0; 3261 xadj[1] = PetscMax(n_neighs-1,0); 3262 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3263 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3264 3265 if (threshold) { 3266 PetscInt xadj_count = 0; 3267 for (i=1;i<n_neighs;i++) { 3268 if (n_shared[i] > threshold) { 3269 adjncy[xadj_count] = neighs[i]; 3270 adjncy_wgt[xadj_count] = n_shared[i]; 3271 xadj_count++; 3272 } 3273 } 3274 xadj[1] = xadj_count; 3275 } else { 3276 if (xadj[1]) { 3277 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3278 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3279 } 3280 } 3281 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3282 if (use_square) { 3283 for (i=0;i<xadj[1];i++) { 3284 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3285 } 3286 } 3287 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3288 3289 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3290 3291 /* 3292 Restrict work on active processes only. 3293 */ 3294 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3295 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3296 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3297 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3298 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3299 if (color) { 3300 ierr = PetscFree(xadj);CHKERRQ(ierr); 3301 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3302 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3303 } else { 3304 PetscInt coarsening_ratio; 3305 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3306 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3307 prank = rank; 3308 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3309 /* 3310 for (i=0;i<size;i++) { 3311 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3312 } 3313 */ 3314 for (i=0;i<xadj[1];i++) { 3315 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3316 } 3317 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3318 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3319 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3320 3321 /* Partition */ 3322 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3323 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3324 if (use_vwgt) { 3325 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3326 v_wgt[0] = local_size; 3327 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3328 } 3329 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3330 coarsening_ratio = size/n_subdomains; 3331 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3332 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3333 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3334 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3335 3336 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3337 if (contiguous) { 3338 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3339 } else { 3340 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3341 } 3342 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3343 /* clean up */ 3344 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3345 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3346 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3347 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3348 } 3349 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3350 3351 /* assemble parallel IS for sends */ 3352 i = 1; 3353 if (color) i=0; 3354 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3355 3356 /* get back IS */ 3357 *is_sends = ranks_send_to; 3358 PetscFunctionReturn(0); 3359 } 3360 3361 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3362 3363 #undef __FUNCT__ 3364 #define __FUNCT__ "MatISSubassemble" 3365 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3366 { 3367 Mat local_mat; 3368 Mat_IS *matis; 3369 IS is_sends_internal; 3370 PetscInt rows,cols,new_local_rows; 3371 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3372 PetscBool ismatis,isdense,newisdense,destroy_mat; 3373 ISLocalToGlobalMapping l2gmap; 3374 PetscInt* l2gmap_indices; 3375 const PetscInt* is_indices; 3376 MatType new_local_type; 3377 /* buffers */ 3378 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3379 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3380 PetscInt *recv_buffer_idxs_local; 3381 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3382 /* MPI */ 3383 MPI_Comm comm,comm_n; 3384 PetscSubcomm subcomm; 3385 PetscMPIInt n_sends,n_recvs,commsize; 3386 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3387 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3388 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3389 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3390 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3391 PetscErrorCode ierr; 3392 3393 PetscFunctionBegin; 3394 /* TODO: add missing checks */ 3395 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3396 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3397 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3398 PetscValidLogicalCollectiveInt(mat,nis,7); 3399 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3400 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3401 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3402 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3403 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3404 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3405 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3406 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3407 PetscInt mrows,mcols,mnrows,mncols; 3408 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3409 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3410 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3411 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3412 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3413 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3414 } 3415 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3416 PetscValidLogicalCollectiveInt(mat,bs,0); 3417 /* prepare IS for sending if not provided */ 3418 if (!is_sends) { 3419 PetscBool pcontig = PETSC_TRUE; 3420 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3421 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3422 } else { 3423 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3424 is_sends_internal = is_sends; 3425 } 3426 3427 /* get pointer of MATIS data */ 3428 matis = (Mat_IS*)mat->data; 3429 3430 /* get comm */ 3431 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3432 3433 /* compute number of sends */ 3434 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3435 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3436 3437 /* compute number of receives */ 3438 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3439 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3440 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3441 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3442 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3443 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3444 ierr = PetscFree(iflags);CHKERRQ(ierr); 3445 3446 /* restrict comm if requested */ 3447 subcomm = 0; 3448 destroy_mat = PETSC_FALSE; 3449 if (restrict_comm) { 3450 PetscMPIInt color,subcommsize; 3451 3452 color = 0; 3453 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3454 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3455 subcommsize = commsize - subcommsize; 3456 /* check if reuse has been requested */ 3457 if (reuse == MAT_REUSE_MATRIX) { 3458 if (*mat_n) { 3459 PetscMPIInt subcommsize2; 3460 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3461 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3462 comm_n = PetscObjectComm((PetscObject)*mat_n); 3463 } else { 3464 comm_n = PETSC_COMM_SELF; 3465 } 3466 } else { /* MAT_INITIAL_MATRIX */ 3467 PetscMPIInt rank; 3468 3469 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3470 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3471 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3472 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3473 comm_n = PetscSubcommChild(subcomm); 3474 } 3475 /* flag to destroy *mat_n if not significative */ 3476 if (color) destroy_mat = PETSC_TRUE; 3477 } else { 3478 comm_n = comm; 3479 } 3480 3481 /* prepare send/receive buffers */ 3482 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3483 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3484 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3485 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3486 if (nis) { 3487 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3488 } 3489 3490 /* Get data from local matrices */ 3491 if (!isdense) { 3492 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3493 /* TODO: See below some guidelines on how to prepare the local buffers */ 3494 /* 3495 send_buffer_vals should contain the raw values of the local matrix 3496 send_buffer_idxs should contain: 3497 - MatType_PRIVATE type 3498 - PetscInt size_of_l2gmap 3499 - PetscInt global_row_indices[size_of_l2gmap] 3500 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3501 */ 3502 } else { 3503 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3504 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3505 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3506 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3507 send_buffer_idxs[1] = i; 3508 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3509 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3510 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3511 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3512 for (i=0;i<n_sends;i++) { 3513 ilengths_vals[is_indices[i]] = len*len; 3514 ilengths_idxs[is_indices[i]] = len+2; 3515 } 3516 } 3517 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3518 /* additional is (if any) */ 3519 if (nis) { 3520 PetscMPIInt psum; 3521 PetscInt j; 3522 for (j=0,psum=0;j<nis;j++) { 3523 PetscInt plen; 3524 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3525 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3526 psum += len+1; /* indices + lenght */ 3527 } 3528 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3529 for (j=0,psum=0;j<nis;j++) { 3530 PetscInt plen; 3531 const PetscInt *is_array_idxs; 3532 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3533 send_buffer_idxs_is[psum] = plen; 3534 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3535 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3536 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3537 psum += plen+1; /* indices + lenght */ 3538 } 3539 for (i=0;i<n_sends;i++) { 3540 ilengths_idxs_is[is_indices[i]] = psum; 3541 } 3542 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3543 } 3544 3545 buf_size_idxs = 0; 3546 buf_size_vals = 0; 3547 buf_size_idxs_is = 0; 3548 for (i=0;i<n_recvs;i++) { 3549 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3550 buf_size_vals += (PetscInt)olengths_vals[i]; 3551 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3552 } 3553 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3554 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3555 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3556 3557 /* get new tags for clean communications */ 3558 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3559 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3560 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3561 3562 /* allocate for requests */ 3563 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3565 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3566 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3567 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3568 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3569 3570 /* communications */ 3571 ptr_idxs = recv_buffer_idxs; 3572 ptr_vals = recv_buffer_vals; 3573 ptr_idxs_is = recv_buffer_idxs_is; 3574 for (i=0;i<n_recvs;i++) { 3575 source_dest = onodes[i]; 3576 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3577 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3578 ptr_idxs += olengths_idxs[i]; 3579 ptr_vals += olengths_vals[i]; 3580 if (nis) { 3581 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3582 ptr_idxs_is += olengths_idxs_is[i]; 3583 } 3584 } 3585 for (i=0;i<n_sends;i++) { 3586 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3587 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3588 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3589 if (nis) { 3590 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3591 } 3592 } 3593 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3594 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3595 3596 /* assemble new l2g map */ 3597 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3598 ptr_idxs = recv_buffer_idxs; 3599 new_local_rows = 0; 3600 for (i=0;i<n_recvs;i++) { 3601 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3602 ptr_idxs += olengths_idxs[i]; 3603 } 3604 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3605 ptr_idxs = recv_buffer_idxs; 3606 new_local_rows = 0; 3607 for (i=0;i<n_recvs;i++) { 3608 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3609 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3610 ptr_idxs += olengths_idxs[i]; 3611 } 3612 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3613 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3614 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3615 3616 /* infer new local matrix type from received local matrices type */ 3617 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3618 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3619 if (n_recvs) { 3620 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3621 ptr_idxs = recv_buffer_idxs; 3622 for (i=0;i<n_recvs;i++) { 3623 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3624 new_local_type_private = MATAIJ_PRIVATE; 3625 break; 3626 } 3627 ptr_idxs += olengths_idxs[i]; 3628 } 3629 switch (new_local_type_private) { 3630 case MATDENSE_PRIVATE: 3631 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3632 new_local_type = MATSEQAIJ; 3633 bs = 1; 3634 } else { /* if I receive only 1 dense matrix */ 3635 new_local_type = MATSEQDENSE; 3636 bs = 1; 3637 } 3638 break; 3639 case MATAIJ_PRIVATE: 3640 new_local_type = MATSEQAIJ; 3641 bs = 1; 3642 break; 3643 case MATBAIJ_PRIVATE: 3644 new_local_type = MATSEQBAIJ; 3645 break; 3646 case MATSBAIJ_PRIVATE: 3647 new_local_type = MATSEQSBAIJ; 3648 break; 3649 default: 3650 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3651 break; 3652 } 3653 } else { /* by default, new_local_type is seqdense */ 3654 new_local_type = MATSEQDENSE; 3655 bs = 1; 3656 } 3657 3658 /* create MATIS object if needed */ 3659 if (reuse == MAT_INITIAL_MATRIX) { 3660 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3661 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3662 } else { 3663 /* it also destroys the local matrices */ 3664 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3665 } 3666 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3667 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3668 3669 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3670 3671 /* Global to local map of received indices */ 3672 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3673 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3674 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3675 3676 /* restore attributes -> type of incoming data and its size */ 3677 buf_size_idxs = 0; 3678 for (i=0;i<n_recvs;i++) { 3679 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3680 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3681 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3682 } 3683 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3684 3685 /* set preallocation */ 3686 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3687 if (!newisdense) { 3688 PetscInt *new_local_nnz=0; 3689 3690 ptr_vals = recv_buffer_vals; 3691 ptr_idxs = recv_buffer_idxs_local; 3692 if (n_recvs) { 3693 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3694 } 3695 for (i=0;i<n_recvs;i++) { 3696 PetscInt j; 3697 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3698 for (j=0;j<*(ptr_idxs+1);j++) { 3699 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3700 } 3701 } else { 3702 /* TODO */ 3703 } 3704 ptr_idxs += olengths_idxs[i]; 3705 } 3706 if (new_local_nnz) { 3707 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3708 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3709 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3710 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3711 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3712 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3713 } else { 3714 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3715 } 3716 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3717 } else { 3718 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3719 } 3720 3721 /* set values */ 3722 ptr_vals = recv_buffer_vals; 3723 ptr_idxs = recv_buffer_idxs_local; 3724 for (i=0;i<n_recvs;i++) { 3725 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3726 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3727 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3728 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3729 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3730 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3731 } else { 3732 /* TODO */ 3733 } 3734 ptr_idxs += olengths_idxs[i]; 3735 ptr_vals += olengths_vals[i]; 3736 } 3737 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3738 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3739 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3742 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3743 3744 #if 0 3745 if (!restrict_comm) { /* check */ 3746 Vec lvec,rvec; 3747 PetscReal infty_error; 3748 3749 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3750 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3751 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3752 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3753 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3754 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3755 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3756 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3757 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3758 } 3759 #endif 3760 3761 /* assemble new additional is (if any) */ 3762 if (nis) { 3763 PetscInt **temp_idxs,*count_is,j,psum; 3764 3765 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3766 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3767 ptr_idxs = recv_buffer_idxs_is; 3768 psum = 0; 3769 for (i=0;i<n_recvs;i++) { 3770 for (j=0;j<nis;j++) { 3771 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3772 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3773 psum += plen; 3774 ptr_idxs += plen+1; /* shift pointer to received data */ 3775 } 3776 } 3777 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3778 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3779 for (i=1;i<nis;i++) { 3780 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3781 } 3782 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3783 ptr_idxs = recv_buffer_idxs_is; 3784 for (i=0;i<n_recvs;i++) { 3785 for (j=0;j<nis;j++) { 3786 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3787 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3788 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3789 ptr_idxs += plen+1; /* shift pointer to received data */ 3790 } 3791 } 3792 for (i=0;i<nis;i++) { 3793 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3794 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3795 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3796 } 3797 ierr = PetscFree(count_is);CHKERRQ(ierr); 3798 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3799 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3800 } 3801 /* free workspace */ 3802 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3803 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3804 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3805 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3806 if (isdense) { 3807 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3808 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3809 } else { 3810 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3811 } 3812 if (nis) { 3813 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3814 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3815 } 3816 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3817 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3818 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3819 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3820 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3821 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3822 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3823 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3824 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3825 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3826 ierr = PetscFree(onodes);CHKERRQ(ierr); 3827 if (nis) { 3828 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3829 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3830 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3831 } 3832 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3833 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3834 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3835 for (i=0;i<nis;i++) { 3836 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3837 } 3838 } 3839 PetscFunctionReturn(0); 3840 } 3841 3842 /* temporary hack into ksp private data structure */ 3843 #include <petsc-private/kspimpl.h> 3844 3845 #undef __FUNCT__ 3846 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3847 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3848 { 3849 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3850 PC_IS *pcis = (PC_IS*)pc->data; 3851 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3852 MatNullSpace CoarseNullSpace=NULL; 3853 ISLocalToGlobalMapping coarse_islg; 3854 IS coarse_is,*isarray; 3855 PetscInt i,im_active=-1,active_procs=-1; 3856 PetscInt nis,nisdofs,nisneu; 3857 PC pc_temp; 3858 PCType coarse_pc_type; 3859 KSPType coarse_ksp_type; 3860 PetscBool multilevel_requested,multilevel_allowed; 3861 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3862 Mat t_coarse_mat_is; 3863 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3864 PetscMPIInt all_procs; 3865 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3866 PetscBool compute_vecs = PETSC_FALSE; 3867 PetscScalar *array; 3868 PetscErrorCode ierr; 3869 3870 PetscFunctionBegin; 3871 /* Assign global numbering to coarse dofs */ 3872 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3873 PetscInt ocoarse_size; 3874 compute_vecs = PETSC_TRUE; 3875 ocoarse_size = pcbddc->coarse_size; 3876 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3877 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3878 /* see if we can avoid some work */ 3879 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3880 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3881 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3882 coarse_reuse = PETSC_FALSE; 3883 } else { /* we can safely reuse already computed coarse matrix */ 3884 coarse_reuse = PETSC_TRUE; 3885 } 3886 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3887 coarse_reuse = PETSC_FALSE; 3888 } 3889 /* reset any subassembling information */ 3890 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3891 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3892 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3893 coarse_reuse = PETSC_TRUE; 3894 } 3895 3896 /* count "active" (i.e. with positive local size) and "void" processes */ 3897 im_active = !!(pcis->n); 3898 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3899 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3900 void_procs = all_procs-active_procs; 3901 csin_type_simple = PETSC_TRUE; 3902 redist = PETSC_FALSE; 3903 if (pcbddc->current_level && void_procs) { 3904 csin_ml = PETSC_TRUE; 3905 ncoarse_ml = void_procs; 3906 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3907 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3908 csin_ds = PETSC_TRUE; 3909 ncoarse_ds = pcbddc->redistribute_coarse; 3910 redist = PETSC_TRUE; 3911 } else { 3912 csin_ds = PETSC_TRUE; 3913 ncoarse_ds = active_procs; 3914 redist = PETSC_TRUE; 3915 } 3916 } else { 3917 csin_ml = PETSC_FALSE; 3918 ncoarse_ml = all_procs; 3919 if (void_procs) { 3920 csin_ds = PETSC_TRUE; 3921 ncoarse_ds = void_procs; 3922 csin_type_simple = PETSC_FALSE; 3923 } else { 3924 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3925 csin_ds = PETSC_TRUE; 3926 ncoarse_ds = pcbddc->redistribute_coarse; 3927 redist = PETSC_TRUE; 3928 } else { 3929 csin_ds = PETSC_FALSE; 3930 ncoarse_ds = all_procs; 3931 } 3932 } 3933 } 3934 3935 /* 3936 test if we can go multilevel: three conditions must be satisfied: 3937 - we have not exceeded the number of levels requested 3938 - we can actually subassemble the active processes 3939 - we can find a suitable number of MPI processes where we can place the subassembled problem 3940 */ 3941 multilevel_allowed = PETSC_FALSE; 3942 multilevel_requested = PETSC_FALSE; 3943 if (pcbddc->current_level < pcbddc->max_levels) { 3944 multilevel_requested = PETSC_TRUE; 3945 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3946 multilevel_allowed = PETSC_FALSE; 3947 } else { 3948 multilevel_allowed = PETSC_TRUE; 3949 } 3950 } 3951 /* determine number of process partecipating to coarse solver */ 3952 if (multilevel_allowed) { 3953 ncoarse = ncoarse_ml; 3954 csin = csin_ml; 3955 redist = PETSC_FALSE; 3956 } else { 3957 ncoarse = ncoarse_ds; 3958 csin = csin_ds; 3959 } 3960 3961 /* creates temporary l2gmap and IS for coarse indexes */ 3962 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3963 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3964 3965 /* creates temporary MATIS object for coarse matrix */ 3966 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3967 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3968 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3969 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3970 #if 0 3971 { 3972 PetscViewer viewer; 3973 char filename[256]; 3974 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3975 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3976 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3977 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3978 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3979 } 3980 #endif 3981 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3982 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3983 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3984 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3985 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3986 3987 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3988 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3989 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3990 const PetscInt *idxs; 3991 ISLocalToGlobalMapping tmap; 3992 3993 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3994 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3995 /* allocate space for temporary storage */ 3996 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3997 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3998 /* allocate for IS array */ 3999 nisdofs = pcbddc->n_ISForDofsLocal; 4000 nisneu = !!pcbddc->NeumannBoundariesLocal; 4001 nis = nisdofs + nisneu; 4002 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4003 /* dofs splitting */ 4004 for (i=0;i<nisdofs;i++) { 4005 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4006 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4007 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4008 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4009 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4010 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4011 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4012 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4013 } 4014 /* neumann boundaries */ 4015 if (pcbddc->NeumannBoundariesLocal) { 4016 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4017 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4018 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4019 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4020 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4021 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4022 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4023 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4024 } 4025 /* free memory */ 4026 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4027 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4028 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4029 } else { 4030 nis = 0; 4031 nisdofs = 0; 4032 nisneu = 0; 4033 isarray = NULL; 4034 } 4035 /* destroy no longer needed map */ 4036 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4037 4038 /* restrict on coarse candidates (if needed) */ 4039 coarse_mat_is = NULL; 4040 if (csin) { 4041 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4042 if (redist) { 4043 PetscMPIInt rank; 4044 PetscInt spc,n_spc_p1,dest[1],destsize; 4045 4046 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4047 spc = active_procs/ncoarse; 4048 n_spc_p1 = active_procs%ncoarse; 4049 if (im_active) { 4050 destsize = 1; 4051 if (rank > n_spc_p1*(spc+1)-1) { 4052 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4053 } else { 4054 dest[0] = rank/(spc+1); 4055 } 4056 } else { 4057 destsize = 0; 4058 } 4059 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4060 } else if (csin_type_simple) { 4061 PetscMPIInt rank; 4062 PetscInt issize,isidx; 4063 4064 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4065 if (im_active) { 4066 issize = 1; 4067 isidx = (PetscInt)rank; 4068 } else { 4069 issize = 0; 4070 isidx = -1; 4071 } 4072 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4073 } else { /* get a suitable subassembling pattern from MATIS code */ 4074 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4075 } 4076 4077 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4078 if (!redist || ncoarse <= void_procs) { 4079 PetscInt ncoarse_cand,tissize,*nisindices; 4080 PetscInt *coarse_candidates; 4081 const PetscInt* tisindices; 4082 4083 /* get coarse candidates' ranks in pc communicator */ 4084 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4085 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4086 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4087 if (!coarse_candidates[i]) { 4088 coarse_candidates[ncoarse_cand++]=i; 4089 } 4090 } 4091 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4092 4093 4094 if (pcbddc->dbg_flag) { 4095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4097 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4098 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4099 for (i=0;i<ncoarse_cand;i++) { 4100 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4101 } 4102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4103 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4104 } 4105 /* shift the pattern on coarse candidates */ 4106 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4107 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4108 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4109 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4110 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4111 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4112 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4113 } 4114 if (pcbddc->dbg_flag) { 4115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4117 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4119 } 4120 } 4121 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4122 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4123 } else { 4124 if (pcbddc->dbg_flag) { 4125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4126 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4127 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4128 } 4129 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4130 coarse_mat_is = t_coarse_mat_is; 4131 } 4132 4133 /* create local to global scatters for coarse problem */ 4134 if (compute_vecs) { 4135 PetscInt lrows; 4136 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4137 if (coarse_mat_is) { 4138 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4139 } else { 4140 lrows = 0; 4141 } 4142 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4143 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4144 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4145 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4146 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4147 } 4148 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4149 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4150 4151 /* set defaults for coarse KSP and PC */ 4152 if (multilevel_allowed) { 4153 coarse_ksp_type = KSPRICHARDSON; 4154 coarse_pc_type = PCBDDC; 4155 } else { 4156 coarse_ksp_type = KSPPREONLY; 4157 coarse_pc_type = PCREDUNDANT; 4158 } 4159 4160 /* print some info if requested */ 4161 if (pcbddc->dbg_flag) { 4162 if (!multilevel_allowed) { 4163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4164 if (multilevel_requested) { 4165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4166 } else if (pcbddc->max_levels) { 4167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4168 } 4169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4170 } 4171 } 4172 4173 /* create the coarse KSP object only once with defaults */ 4174 if (coarse_mat_is) { 4175 MatReuse coarse_mat_reuse; 4176 PetscViewer dbg_viewer = NULL; 4177 if (pcbddc->dbg_flag) { 4178 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4179 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4180 } 4181 if (!pcbddc->coarse_ksp) { 4182 char prefix[256],str_level[16]; 4183 size_t len; 4184 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4185 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4186 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4187 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4188 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4189 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4190 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4191 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4192 /* prefix */ 4193 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4194 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4195 if (!pcbddc->current_level) { 4196 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4197 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4198 } else { 4199 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4200 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4201 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4202 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4203 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4204 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4205 } 4206 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4207 /* allow user customization */ 4208 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4209 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4210 } 4211 4212 /* get some info after set from options */ 4213 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4214 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4215 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4216 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4217 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4218 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4219 isbddc = PETSC_FALSE; 4220 } 4221 if (isredundant) { 4222 KSP inner_ksp; 4223 PC inner_pc; 4224 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4225 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4226 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4227 } 4228 4229 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4230 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4231 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4232 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4233 if (nisdofs) { 4234 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4235 for (i=0;i<nisdofs;i++) { 4236 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4237 } 4238 } 4239 if (nisneu) { 4240 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4241 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4242 } 4243 4244 /* assemble coarse matrix */ 4245 if (coarse_reuse) { 4246 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4247 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4248 coarse_mat_reuse = MAT_REUSE_MATRIX; 4249 } else { 4250 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4251 } 4252 if (isbddc || isnn) { 4253 if (pcbddc->coarsening_ratio > 1) { 4254 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4255 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4256 if (pcbddc->dbg_flag) { 4257 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4258 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4259 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4260 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4261 } 4262 } 4263 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4264 } else { 4265 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4266 coarse_mat = coarse_mat_is; 4267 } 4268 } else { 4269 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4270 } 4271 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4272 4273 /* propagate symmetry info to coarse matrix */ 4274 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4275 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4276 4277 /* set operators */ 4278 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4279 if (pcbddc->dbg_flag) { 4280 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4281 } 4282 } else { /* processes non partecipating to coarse solver (if any) */ 4283 coarse_mat = 0; 4284 } 4285 ierr = PetscFree(isarray);CHKERRQ(ierr); 4286 #if 0 4287 { 4288 PetscViewer viewer; 4289 char filename[256]; 4290 sprintf(filename,"coarse_mat.m"); 4291 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4292 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4293 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4294 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4295 } 4296 #endif 4297 4298 /* Compute coarse null space (special handling by BDDC only) */ 4299 if (pcbddc->NullSpace) { 4300 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4301 } 4302 4303 if (pcbddc->coarse_ksp) { 4304 Vec crhs,csol; 4305 PetscBool ispreonly; 4306 if (CoarseNullSpace) { 4307 if (isbddc) { 4308 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4309 } else { 4310 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4311 } 4312 } 4313 /* setup coarse ksp */ 4314 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4315 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4316 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4317 /* hack */ 4318 if (!csol) { 4319 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4320 } 4321 if (!crhs) { 4322 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4323 } 4324 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4325 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4326 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4327 KSP check_ksp; 4328 KSPType check_ksp_type; 4329 PC check_pc; 4330 Vec check_vec,coarse_vec; 4331 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4332 PetscInt its; 4333 PetscBool compute_eigs; 4334 PetscReal *eigs_r,*eigs_c; 4335 PetscInt neigs; 4336 const char *prefix; 4337 4338 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4339 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4340 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4341 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4342 if (ispreonly) { 4343 check_ksp_type = KSPPREONLY; 4344 compute_eigs = PETSC_FALSE; 4345 } else { 4346 check_ksp_type = KSPGMRES; 4347 compute_eigs = PETSC_TRUE; 4348 } 4349 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4350 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4351 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4352 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4353 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4354 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4355 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4356 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4357 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4358 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4359 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4360 /* create random vec */ 4361 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4362 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4363 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4364 if (CoarseNullSpace) { 4365 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4366 } 4367 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4368 /* solve coarse problem */ 4369 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4370 if (CoarseNullSpace) { 4371 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4372 } 4373 /* set eigenvalue estimation if preonly has not been requested */ 4374 if (compute_eigs) { 4375 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4376 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4377 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4378 lambda_max = eigs_r[neigs-1]; 4379 lambda_min = eigs_r[0]; 4380 if (pcbddc->use_coarse_estimates) { 4381 if (lambda_max>lambda_min) { 4382 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4383 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4384 } 4385 } 4386 } 4387 4388 /* check coarse problem residual error */ 4389 if (pcbddc->dbg_flag) { 4390 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4391 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4392 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4393 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4394 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4395 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4396 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4397 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4398 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4399 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4400 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4401 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4402 if (compute_eigs) { 4403 PetscReal lambda_max_s,lambda_min_s; 4404 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4405 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4406 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4407 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4408 for (i=0;i<neigs;i++) { 4409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4410 } 4411 } 4412 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4413 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4414 } 4415 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4416 if (compute_eigs) { 4417 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4418 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4419 } 4420 } 4421 } 4422 /* print additional info */ 4423 if (pcbddc->dbg_flag) { 4424 /* waits until all processes reaches this point */ 4425 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4428 } 4429 4430 /* free memory */ 4431 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4432 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #undef __FUNCT__ 4437 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4438 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4439 { 4440 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4441 PC_IS* pcis = (PC_IS*)pc->data; 4442 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4443 PetscInt i,coarse_size; 4444 PetscInt *local_primal_indices; 4445 PetscErrorCode ierr; 4446 4447 PetscFunctionBegin; 4448 /* Compute global number of coarse dofs */ 4449 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4450 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4451 } 4452 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4453 4454 /* check numbering */ 4455 if (pcbddc->dbg_flag) { 4456 PetscScalar coarsesum,*array; 4457 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4458 4459 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4460 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4462 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4463 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4464 for (i=0;i<pcbddc->local_primal_size;i++) { 4465 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4466 } 4467 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4468 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4469 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4470 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4471 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4472 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4473 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4474 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4475 for (i=0;i<pcis->n;i++) { 4476 if (array[i] == 1.0) { 4477 set_error = PETSC_TRUE; 4478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4479 } 4480 } 4481 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4483 for (i=0;i<pcis->n;i++) { 4484 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4485 } 4486 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4487 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4488 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4489 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4490 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4491 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4492 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4493 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4496 for (i=0;i<pcbddc->local_primal_size;i++) { 4497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4498 } 4499 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4500 } 4501 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4502 if (set_error_reduced) { 4503 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4504 } 4505 } 4506 /* get back data */ 4507 *coarse_size_n = coarse_size; 4508 *local_primal_indices_n = local_primal_indices; 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "PCBDDCGlobalToLocal" 4514 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4515 { 4516 IS localis_t; 4517 PetscInt i,lsize,*idxs,n; 4518 PetscScalar *vals; 4519 PetscErrorCode ierr; 4520 4521 PetscFunctionBegin; 4522 /* get indices in local ordering exploiting local to global map */ 4523 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4524 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4525 for (i=0;i<lsize;i++) vals[i] = 1.0; 4526 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4527 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4528 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4529 if (idxs) { /* multilevel guard */ 4530 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4531 } 4532 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4533 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4534 ierr = PetscFree(vals);CHKERRQ(ierr); 4535 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4536 /* now compute set in local ordering */ 4537 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4538 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4539 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4540 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4541 for (i=0,lsize=0;i<n;i++) { 4542 if (PetscRealPart(vals[i]) > 0.5) { 4543 lsize++; 4544 } 4545 } 4546 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4547 for (i=0,lsize=0;i<n;i++) { 4548 if (PetscRealPart(vals[i]) > 0.5) { 4549 idxs[lsize++] = i; 4550 } 4551 } 4552 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4553 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4554 *localis = localis_t; 4555 PetscFunctionReturn(0); 4556 } 4557 4558 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "PCBDDCMatMult_Private" 4561 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4562 { 4563 PCBDDCChange_ctx change_ctx; 4564 PetscErrorCode ierr; 4565 4566 PetscFunctionBegin; 4567 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4568 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4569 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4570 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4571 PetscFunctionReturn(0); 4572 } 4573 4574 #undef __FUNCT__ 4575 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4576 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4577 { 4578 PCBDDCChange_ctx change_ctx; 4579 PetscErrorCode ierr; 4580 4581 PetscFunctionBegin; 4582 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4583 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4584 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4585 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4591 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4592 { 4593 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4594 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4595 PetscInt *used_xadj,*used_adjncy; 4596 PetscBool free_used_adj; 4597 PetscErrorCode ierr; 4598 4599 PetscFunctionBegin; 4600 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4601 free_used_adj = PETSC_FALSE; 4602 if (pcbddc->sub_schurs_layers == -1) { 4603 used_xadj = NULL; 4604 used_adjncy = NULL; 4605 } else { 4606 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4607 used_xadj = pcbddc->mat_graph->xadj; 4608 used_adjncy = pcbddc->mat_graph->adjncy; 4609 } else if (pcbddc->computed_rowadj) { 4610 used_xadj = pcbddc->mat_graph->xadj; 4611 used_adjncy = pcbddc->mat_graph->adjncy; 4612 } else { 4613 PetscBool flg_row=PETSC_FALSE; 4614 const PetscInt *xadj,*adjncy; 4615 PetscInt nvtxs; 4616 4617 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4618 if (flg_row) { 4619 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4620 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4621 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4622 free_used_adj = PETSC_TRUE; 4623 } else { 4624 pcbddc->sub_schurs_layers = -1; 4625 used_xadj = NULL; 4626 used_adjncy = NULL; 4627 } 4628 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4629 } 4630 } 4631 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4632 4633 /* free adjacency */ 4634 if (free_used_adj) { 4635 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4636 } 4637 PetscFunctionReturn(0); 4638 } 4639 4640 #undef __FUNCT__ 4641 #define __FUNCT__ "PCBDDCInitSubSchurs" 4642 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4643 { 4644 PC_IS *pcis=(PC_IS*)pc->data; 4645 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4646 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4647 PCBDDCGraph graph; 4648 Mat S_j; 4649 PetscErrorCode ierr; 4650 4651 PetscFunctionBegin; 4652 /* attach interface graph for determining subsets */ 4653 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4654 IS verticesIS; 4655 4656 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4657 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4658 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4659 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4660 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4661 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4662 /* 4663 if (pcbddc->dbg_flag) { 4664 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4665 } 4666 */ 4667 } else { 4668 graph = pcbddc->mat_graph; 4669 } 4670 4671 /* Create Schur complement matrix */ 4672 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4673 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4674 4675 /* sub_schurs init */ 4676 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4677 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4678 /* free graph struct */ 4679 if (pcbddc->sub_schurs_rebuild) { 4680 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4681 } 4682 PetscFunctionReturn(0); 4683 } 4684