1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 /* max size of subsets */ 36 mss = 0; 37 for (i=0;i<sub_schurs->n_subs;i++) { 38 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 39 PetscInt subset_size; 40 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 41 mss = PetscMax(mss,subset_size); 42 } 43 } 44 45 /* min/max and threshold */ 46 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 47 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 48 nmax = PetscMax(nmin,nmax); 49 if (pcbddc->adaptive_threshold > 1.0) { 50 thresh = 1.0/pcbddc->adaptive_threshold; 51 } else { 52 thresh = 1.0; 53 } 54 55 /* allocate lapack workspace */ 56 cum = cum2 = 0; 57 maxneigs = 0; 58 for (i=0;i<sub_schurs->n_subs;i++) { 59 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 60 PetscInt n,subset_size; 61 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 62 n = PetscMin(subset_size,nmax); 63 cum += subset_size*n; 64 cum2 += n; 65 maxneigs = PetscMax(maxneigs,n); 66 } 67 } 68 69 if (mss) { 70 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 71 PetscBLASInt B_itype = 1; 72 PetscBLASInt B_N = mss; 73 PetscReal zero = 0.0; 74 PetscReal eps = 0.0; /* dlamch? */ 75 76 B_lwork = -1; 77 S = NULL; 78 St = NULL; 79 eigs = NULL; 80 eigv = NULL; 81 B_iwork = NULL; 82 B_ifail = NULL; 83 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 84 #if defined(PETSC_USE_COMPLEX) 85 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 86 #else 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 88 #endif 89 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 90 ierr = PetscFPTrapPop();CHKERRQ(ierr); 91 } else { 92 /* TODO */ 93 } 94 } else { 95 lwork = 0; 96 } 97 98 nv = 0; 99 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 100 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 101 } 102 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 103 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 104 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 105 #if defined(PETSC_USE_COMPLEX) 106 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 107 #endif 108 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 109 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 110 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 111 nv+cum,&pcbddc->adaptive_constraints_idxs, 112 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 113 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 114 115 maxneigs = 0; 116 cum = cum2 = cumarray = 0; 117 if (sub_schurs->is_Ej_com) { 118 const PetscInt *idxs; 119 120 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 121 for (cum=0;cum<nv;cum++) { 122 pcbddc->adaptive_constraints_n[cum] = 1; 123 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 124 pcbddc->adaptive_constraints_ptrs[cum] = cum; 125 pcbddc->adaptive_constraints_data[cum] = 1.0; 126 } 127 cum2 = cum; 128 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 } 130 131 if (mss) { /* multilevel */ 132 if (pcbddc->use_deluxe_scaling) { 133 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 134 } else { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 136 } 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 138 } 139 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,scalar_zero = 0.0; 147 PetscReal zero=0.0; 148 PetscBLASInt B_N; 149 150 /* S should be copied since we need it for deluxe scaling */ 151 if (sub_schurs->is_hermitian) { 152 PetscInt j; 153 for (j=0;j<subset_size;j++) { 154 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 155 } 156 for (j=0;j<subset_size;j++) { 157 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 158 } 159 } else { 160 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 } 163 /* is always this the right matrix? */ 164 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 166 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 167 /* St = Starray+cumarray; */ 168 169 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 170 PetscBLASInt B_itype = 1; 171 PetscBLASInt B_IL = 1, B_IU; 172 PetscReal eps = -1.0; /* dlamch? */ 173 PetscInt nmin_s; 174 175 /* ask for eigenvalues lower than thresh */ 176 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 177 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 178 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 179 #if defined(PETSC_USE_COMPLEX) 180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 181 #else 182 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 183 #endif 184 ierr = PetscFPTrapPop();CHKERRQ(ierr); 185 if (B_ierr) { 186 if (B_ierr < 0 ) { 187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 188 } else if (B_ierr <= B_N) { 189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 190 } else { 191 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 192 } 193 } 194 195 if (B_neigs > nmax) { 196 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 197 B_neigs = nmax; 198 } 199 200 nmin_s = PetscMin(nmin,B_N); 201 if (B_neigs < nmin_s) { 202 PetscBLASInt B_neigs2; 203 204 B_IL = B_neigs + 1; 205 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 206 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 207 if (sub_schurs->is_hermitian) { 208 PetscInt j; 209 for (j=0;j<subset_size;j++) { 210 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 211 } 212 for (j=0;j<subset_size;j++) { 213 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 214 } 215 } else { 216 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 217 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 } 219 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 220 #if defined(PETSC_USE_COMPLEX) 221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 222 #else 223 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 224 #endif 225 ierr = PetscFPTrapPop();CHKERRQ(ierr); 226 B_neigs += B_neigs2; 227 } 228 if (B_ierr) { 229 if (B_ierr < 0 ) { 230 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 231 } else if (B_ierr <= B_N) { 232 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 233 } else { 234 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 235 } 236 } 237 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 238 for (j=0;j<B_neigs;j++) { 239 if (eigs[j] == 0.0) { 240 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 241 } else { 242 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 243 } 244 } 245 } else { 246 /* TODO */ 247 } 248 maxneigs = PetscMax(B_neigs,maxneigs); 249 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 250 251 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 252 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 253 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 #if 0 255 PetscInt ii; 256 for (ii=0;ii<B_neigs;ii++) { 257 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 258 for (j=0;j<B_N;j++) { 259 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 260 } 261 } 262 #endif 263 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 264 for (j=0;j<B_neigs;j++) { 265 #if 0 266 { 267 PetscBLASInt Blas_N,Blas_one = 1.0; 268 PetscScalar norm; 269 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 270 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 271 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 272 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 273 } else { 274 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 275 } 276 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 277 } 278 #endif 279 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 280 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 281 cum2 += subset_size; 282 } 283 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 284 } 285 /* shift for next computation */ 286 cumarray += subset_size*subset_size; 287 } 288 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 289 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 290 291 if (mss) { 292 if (pcbddc->use_deluxe_scaling) { 293 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 294 } else { 295 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 296 } 297 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 298 } 299 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 300 #if defined(PETSC_USE_COMPLEX) 301 ierr = PetscFree(rwork);CHKERRQ(ierr); 302 #endif 303 if (pcbddc->dbg_flag) { 304 PetscInt maxneigs_r; 305 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 306 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "PCBDDCSetUpSolvers" 313 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 314 { 315 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 316 PetscScalar *coarse_submat_vals; 317 PetscErrorCode ierr; 318 319 PetscFunctionBegin; 320 /* Setup local scatters R_to_B and (optionally) R_to_D */ 321 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 322 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 323 324 /* Setup local neumann solver ksp_R */ 325 /* PCBDDCSetUpLocalScatters should be called first! */ 326 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 327 328 /* Change global null space passed in by the user if change of basis has been requested */ 329 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 330 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 331 } 332 333 /* 334 Setup local correction and local part of coarse basis. 335 Gives back the dense local part of the coarse matrix in column major ordering 336 */ 337 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 338 339 /* Compute total number of coarse nodes and setup coarse solver */ 340 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 341 342 /* free */ 343 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "PCBDDCResetCustomization" 349 PetscErrorCode PCBDDCResetCustomization(PC pc) 350 { 351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 356 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 357 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 358 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 361 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 362 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 363 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 364 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "PCBDDCResetTopography" 371 PetscErrorCode PCBDDCResetTopography(PC pc) 372 { 373 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 378 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 380 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 381 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCBDDCResetSolvers" 387 PetscErrorCode PCBDDCResetSolvers(PC pc) 388 { 389 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 390 PetscScalar *array; 391 PetscErrorCode ierr; 392 393 PetscFunctionBegin; 394 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 395 if (pcbddc->coarse_phi_B) { 396 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 397 ierr = PetscFree(array);CHKERRQ(ierr); 398 } 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 /* Get submatrices from subdomain matrix */ 652 if (n_vertices) { 653 Mat newmat; 654 IS is_aux; 655 PetscInt ibs,mbs; 656 PetscBool issbaij; 657 658 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 659 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 660 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 661 if (ibs != mbs) { /* need to convert to SEQAIJ */ 662 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 666 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 667 } else { 668 /* this is safe */ 669 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 671 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 672 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 673 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 674 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 675 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 676 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 677 } else { 678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 679 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 680 } 681 } 682 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 683 } 684 685 /* Matrix of coarse basis functions (local) */ 686 if (pcbddc->coarse_phi_B) { 687 PetscInt on_B,on_primal,on_D=n_D; 688 if (pcbddc->coarse_phi_D) { 689 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 690 } 691 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 692 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 693 PetscScalar *marray; 694 695 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 696 ierr = PetscFree(marray);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 701 } 702 } 703 704 if (!pcbddc->coarse_phi_B) { 705 PetscScalar *marray; 706 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 n *= 2; 713 } 714 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 n = n_B*pcbddc->local_primal_size; 717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 n += n_D*pcbddc->local_primal_size; 720 } 721 if (!pcbddc->issym) { 722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 724 n = n_B*pcbddc->local_primal_size; 725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 729 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 731 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 732 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 733 } 734 } 735 } 736 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 737 /* vertices */ 738 if (n_vertices) { 739 740 if (n_R) { 741 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 742 PetscBLASInt B_N,B_one = 1; 743 PetscScalar *x,*y; 744 745 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 747 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 748 if (F) { 749 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 750 } else { 751 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 752 for (i=0;i<n_vertices;i++) { 753 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 754 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 758 } 759 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 760 } 761 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 762 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 763 if (n_constraints) { 764 Mat B; 765 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 767 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 768 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 769 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 770 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 771 ierr = MatDestroy(&B);CHKERRQ(ierr); 772 } 773 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 774 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 775 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 776 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 778 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 779 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 780 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 781 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 782 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 783 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 784 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 785 } else { 786 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 787 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 788 } 789 /* coarse basis functions */ 790 for (i=0;i<n_vertices;i++) { 791 PetscScalar *y; 792 793 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 794 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 795 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 y[n_B*i+idx_V_B[i]] = 1.0; 799 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 800 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 801 802 if (pcbddc->switch_static || pcbddc->dbg_flag) { 803 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 804 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 805 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 808 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 809 } 810 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 811 } 812 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 813 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 814 } 815 816 if (n_constraints) { 817 Mat B; 818 819 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 820 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 822 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 823 if (n_vertices) { 824 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 825 } 826 ierr = MatDestroy(&B);CHKERRQ(ierr); 827 /* coarse basis functions */ 828 for (i=0;i<n_constraints;i++) { 829 PetscScalar *y; 830 831 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 837 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 838 839 if (pcbddc->switch_static || pcbddc->dbg_flag) { 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 846 } 847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 848 } 849 } 850 851 /* compute other basis functions for non-symmetric problems */ 852 if (!pcbddc->issym) { 853 Mat B,X; 854 855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 856 857 if (n_constraints) { 858 Mat S_CCT,B_C; 859 860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 861 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 862 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 863 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 864 if (n_vertices) { 865 Mat B_V,S_VCT; 866 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 868 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 869 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 871 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 872 } 873 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 874 } 875 if (n_vertices && n_R) { 876 Mat A_VRT; 877 PetscBLASInt B_N,B_one = 1; 878 879 if (!n_constraints) { /* if there are no constraints, reset work */ 880 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 883 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 884 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 885 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 886 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 887 } 888 889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 890 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 891 for (i=0;i<pcbddc->local_primal_size;i++) { 892 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 893 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 894 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 896 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 897 } 898 } else { 899 for (i=0;i<pcbddc->local_primal_size;i++) { 900 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 902 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 904 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 905 } 906 } 907 ierr = MatDestroy(&B);CHKERRQ(ierr); 908 /* coarse basis functions */ 909 for (i=0;i<pcbddc->local_primal_size;i++) { 910 PetscScalar *y; 911 912 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 913 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 if (i<n_vertices) { 918 y[n_B*i+idx_V_B[i]] = 1.0; 919 } 920 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 921 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 922 923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 924 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 925 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 926 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 929 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 930 } 931 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 932 } 933 ierr = MatDestroy(&X);CHKERRQ(ierr); 934 } 935 936 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 940 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 941 /* Checking coarse_sub_mat and coarse basis functios */ 942 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 944 if (pcbddc->dbg_flag) { 945 Mat coarse_sub_mat; 946 Mat AUXMAT,TM1,TM2,TM3,TM4; 947 Mat coarse_phi_D,coarse_phi_B; 948 Mat coarse_psi_D,coarse_psi_B; 949 Mat A_II,A_BB,A_IB,A_BI; 950 MatType checkmattype=MATSEQAIJ; 951 PetscReal real_value; 952 953 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 956 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 958 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 959 if (unsymmetric_check) { 960 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 961 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 962 } 963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 964 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 968 if (unsymmetric_check) { 969 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 970 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 971 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 972 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 973 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 974 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 975 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 976 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 977 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 978 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 979 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 980 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 981 } else { 982 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 983 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 984 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 985 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 986 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 987 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 988 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 989 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 990 } 991 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 994 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 995 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 996 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 997 998 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1004 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1010 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1011 if (unsymmetric_check) { 1012 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1013 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1014 } 1015 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1016 } 1017 1018 /* free memory */ 1019 ierr = PetscFree(work);CHKERRQ(ierr); 1020 if (n_vertices) { 1021 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1022 } 1023 if (n_constraints) { 1024 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1025 } 1026 /* get back data */ 1027 *coarse_submat_vals_n = coarse_submat_vals; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1034 { 1035 Mat *work_mat; 1036 IS isrow_s,iscol_s; 1037 PetscBool rsorted,csorted; 1038 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1039 PetscErrorCode ierr; 1040 1041 PetscFunctionBegin; 1042 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1043 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1045 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1046 1047 if (!rsorted) { 1048 const PetscInt *idxs; 1049 PetscInt *idxs_sorted,i; 1050 1051 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1052 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1053 for (i=0;i<rsize;i++) { 1054 idxs_perm_r[i] = i; 1055 } 1056 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1057 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1058 for (i=0;i<rsize;i++) { 1059 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1060 } 1061 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1063 } else { 1064 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1065 isrow_s = isrow; 1066 } 1067 1068 if (!csorted) { 1069 if (isrow == iscol) { 1070 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1071 iscol_s = isrow_s; 1072 } else { 1073 const PetscInt *idxs; 1074 PetscInt *idxs_sorted,i; 1075 1076 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1077 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1078 for (i=0;i<csize;i++) { 1079 idxs_perm_c[i] = i; 1080 } 1081 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1082 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1083 for (i=0;i<csize;i++) { 1084 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1085 } 1086 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1087 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1088 } 1089 } else { 1090 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1091 iscol_s = iscol; 1092 } 1093 1094 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1095 1096 if (!rsorted || !csorted) { 1097 Mat new_mat; 1098 IS is_perm_r,is_perm_c; 1099 1100 if (!rsorted) { 1101 PetscInt *idxs_r,i; 1102 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1103 for (i=0;i<rsize;i++) { 1104 idxs_r[idxs_perm_r[i]] = i; 1105 } 1106 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1108 } else { 1109 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1110 } 1111 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1112 1113 if (!csorted) { 1114 if (isrow_s == iscol_s) { 1115 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1116 is_perm_c = is_perm_r; 1117 } else { 1118 PetscInt *idxs_c,i; 1119 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1120 for (i=0;i<csize;i++) { 1121 idxs_c[idxs_perm_c[i]] = i; 1122 } 1123 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1124 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1125 } 1126 } else { 1127 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1128 } 1129 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1130 1131 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1132 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1133 work_mat[0] = new_mat; 1134 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1135 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1136 } 1137 1138 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1139 *B = work_mat[0]; 1140 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1141 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1142 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1149 { 1150 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1152 Mat new_mat; 1153 IS is_local,is_global; 1154 PetscInt local_size; 1155 PetscBool isseqaij; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1160 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1161 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1162 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1163 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1164 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1165 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1166 1167 /* check */ 1168 if (pcbddc->dbg_flag) { 1169 Vec x,x_change; 1170 PetscReal error; 1171 1172 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1173 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1174 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1178 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1180 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1181 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x);CHKERRQ(ierr); 1185 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1186 } 1187 1188 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1189 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1190 if (isseqaij) { 1191 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1192 } else { 1193 Mat work_mat; 1194 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1195 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1197 } 1198 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1199 /* 1200 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1201 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1202 */ 1203 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1210 { 1211 PC_IS* pcis = (PC_IS*)(pc->data); 1212 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1213 IS is_aux1,is_aux2; 1214 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1215 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1216 PetscInt vbs,bs; 1217 PetscBT bitmask; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 /* 1222 No need to setup local scatters if 1223 - primal space is unchanged 1224 AND 1225 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1226 AND 1227 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1228 */ 1229 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1230 PetscFunctionReturn(0); 1231 } 1232 /* destroy old objects */ 1233 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1235 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1236 /* Set Non-overlapping dimensions */ 1237 n_B = pcis->n_B; n_D = pcis->n - n_B; 1238 n_vertices = pcbddc->n_actual_vertices; 1239 /* create auxiliary bitmask */ 1240 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1241 for (i=0;i<n_vertices;i++) { 1242 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1243 } 1244 1245 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1246 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1247 for (i=0, n_R=0; i<pcis->n; i++) { 1248 if (!PetscBTLookup(bitmask,i)) { 1249 idx_R_local[n_R] = i; 1250 n_R++; 1251 } 1252 } 1253 1254 /* Block code */ 1255 vbs = 1; 1256 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1257 if (bs>1 && !(n_vertices%bs)) { 1258 PetscBool is_blocked = PETSC_TRUE; 1259 PetscInt *vary; 1260 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1261 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1262 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1263 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1264 for (i=0; i<n_vertices; i++) { 1265 if (vary[i]!=0 && vary[i]!=bs) { 1266 is_blocked = PETSC_FALSE; 1267 break; 1268 } 1269 } 1270 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1271 vbs = bs; 1272 for (i=0;i<n_R/vbs;i++) { 1273 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1274 } 1275 } 1276 ierr = PetscFree(vary);CHKERRQ(ierr); 1277 } 1278 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1279 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1280 1281 /* print some info if requested */ 1282 if (pcbddc->dbg_flag) { 1283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1291 } 1292 1293 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1294 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1296 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1297 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 for (i=0; i<n_D; i++) { 1299 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1300 } 1301 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1302 for (i=0, j=0; i<n_R; i++) { 1303 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1304 aux_array1[j++] = i; 1305 } 1306 } 1307 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1308 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1309 for (i=0, j=0; i<n_B; i++) { 1310 if (!PetscBTLookup(bitmask,is_indices[i])) { 1311 aux_array2[j++] = i; 1312 } 1313 } 1314 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1315 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1316 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1318 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1319 1320 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1321 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1322 for (i=0, j=0; i<n_R; i++) { 1323 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1324 aux_array1[j++] = i; 1325 } 1326 } 1327 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1328 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1329 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1330 } 1331 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1340 { 1341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1342 PC_IS *pcis = (PC_IS*)pc->data; 1343 PC pc_temp; 1344 Mat A_RR; 1345 MatReuse reuse; 1346 PetscScalar m_one = -1.0; 1347 PetscReal value; 1348 PetscInt n_D,n_R,ibs,mbs; 1349 PetscBool use_exact,use_exact_reduced,issbaij; 1350 PetscErrorCode ierr; 1351 /* prefixes stuff */ 1352 char dir_prefix[256],neu_prefix[256],str_level[16]; 1353 size_t len; 1354 1355 PetscFunctionBegin; 1356 1357 /* compute prefixes */ 1358 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1359 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1360 if (!pcbddc->current_level) { 1361 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1363 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1364 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1367 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1368 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1369 len -= 15; /* remove "pc_bddc_coarse_" */ 1370 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1371 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1372 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1374 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1376 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1377 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1378 } 1379 1380 /* DIRICHLET PROBLEM */ 1381 if (dirichlet) { 1382 /* Matrix for Dirichlet problem is pcis->A_II */ 1383 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1384 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1385 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1386 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1387 /* default */ 1388 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1389 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1391 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1392 if (issbaij) { 1393 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1394 } else { 1395 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1396 } 1397 /* Allow user's customization */ 1398 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1399 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1400 } 1401 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1402 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1403 if (!n_D) { 1404 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1405 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1406 } 1407 /* Set Up KSP for Dirichlet problem of BDDC */ 1408 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1409 /* set ksp_D into pcis data */ 1410 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1411 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1412 pcis->ksp_D = pcbddc->ksp_D; 1413 } 1414 1415 /* NEUMANN PROBLEM */ 1416 A_RR = 0; 1417 if (neumann) { 1418 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1419 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1420 if (pcbddc->ksp_R) { /* already created ksp */ 1421 PetscInt nn_R; 1422 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1423 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1424 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1425 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1426 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1427 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1428 reuse = MAT_INITIAL_MATRIX; 1429 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1430 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1431 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1432 reuse = MAT_INITIAL_MATRIX; 1433 } else { /* safe to reuse the matrix */ 1434 reuse = MAT_REUSE_MATRIX; 1435 } 1436 } 1437 /* last check */ 1438 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1439 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1440 reuse = MAT_INITIAL_MATRIX; 1441 } 1442 } else { /* first time, so we need to create the matrix */ 1443 reuse = MAT_INITIAL_MATRIX; 1444 } 1445 /* extract A_RR */ 1446 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1447 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1448 if (ibs != mbs) { 1449 Mat newmat; 1450 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1451 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1452 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1453 } else { 1454 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1455 } 1456 if (!pcbddc->ksp_R) { /* create object if not present */ 1457 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1458 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1459 /* default */ 1460 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1461 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1462 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1464 if (issbaij) { 1465 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1466 } else { 1467 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1468 } 1469 /* Allow user's customization */ 1470 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1471 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1472 } 1473 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1474 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1475 if (!n_R) { 1476 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1477 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1478 } 1479 /* Set Up KSP for Neumann problem of BDDC */ 1480 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1481 } 1482 1483 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1484 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1485 if (pcbddc->dbg_flag) { 1486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1489 } 1490 if (dirichlet) { /* Dirichlet */ 1491 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1492 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1495 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1496 /* need to be adapted? */ 1497 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1498 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1499 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1500 /* print info */ 1501 if (pcbddc->dbg_flag) { 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1504 } 1505 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1506 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1507 } 1508 } 1509 if (neumann) { /* Neumann */ 1510 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1511 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1514 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1515 /* need to be adapted? */ 1516 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1517 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1518 /* print info */ 1519 if (pcbddc->dbg_flag) { 1520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1522 } 1523 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1524 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 /* free Neumann problem's matrix */ 1529 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1535 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1536 { 1537 PetscErrorCode ierr; 1538 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1539 1540 PetscFunctionBegin; 1541 if (applytranspose) { 1542 if (pcbddc->local_auxmat1) { 1543 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1544 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1545 } 1546 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1547 } else { 1548 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1549 if (pcbddc->local_auxmat1) { 1550 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1551 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1552 } 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1560 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1561 { 1562 PetscErrorCode ierr; 1563 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1564 PC_IS* pcis = (PC_IS*) (pc->data); 1565 const PetscScalar zero = 0.0; 1566 1567 PetscFunctionBegin; 1568 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1569 if (applytranspose) { 1570 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1571 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1572 } else { 1573 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1574 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1575 } 1576 /* start communications from local primal nodes to rhs of coarse solver */ 1577 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 1581 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1582 /* TODO remove null space when doing multilevel */ 1583 if (pcbddc->coarse_ksp) { 1584 if (applytranspose) { 1585 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1586 } else { 1587 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1588 } 1589 } 1590 1591 /* Local solution on R nodes */ 1592 if (pcis->n) { 1593 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1594 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { 1597 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1599 } 1600 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1601 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1602 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (pcbddc->switch_static) { 1605 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 } 1608 } 1609 1610 /* communications from coarse sol to local primal nodes */ 1611 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 1614 /* Sum contributions from two levels */ 1615 if (applytranspose) { 1616 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1617 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1618 } else { 1619 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1620 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1627 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1628 { 1629 PetscErrorCode ierr; 1630 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1631 PetscScalar *array; 1632 Vec from,to; 1633 1634 PetscFunctionBegin; 1635 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1636 from = pcbddc->coarse_vec; 1637 to = pcbddc->vec1_P; 1638 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1639 Vec tvec; 1640 1641 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1642 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1643 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1644 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1645 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1646 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1647 } 1648 } else { /* from local to global -> put data in coarse right hand side */ 1649 from = pcbddc->vec1_P; 1650 to = pcbddc->coarse_vec; 1651 } 1652 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1658 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1659 { 1660 PetscErrorCode ierr; 1661 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1662 PetscScalar *array; 1663 Vec from,to; 1664 1665 PetscFunctionBegin; 1666 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1667 from = pcbddc->coarse_vec; 1668 to = pcbddc->vec1_P; 1669 } else { /* from local to global -> put data in coarse right hand side */ 1670 from = pcbddc->vec1_P; 1671 to = pcbddc->coarse_vec; 1672 } 1673 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1674 if (smode == SCATTER_FORWARD) { 1675 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1676 Vec tvec; 1677 1678 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1679 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1680 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1681 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1682 } 1683 } else { 1684 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1685 ierr = VecResetArray(from);CHKERRQ(ierr); 1686 } 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /* uncomment for testing purposes */ 1692 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1695 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1696 { 1697 PetscErrorCode ierr; 1698 PC_IS* pcis = (PC_IS*)(pc->data); 1699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1700 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1701 /* one and zero */ 1702 PetscScalar one=1.0,zero=0.0; 1703 /* space to store constraints and their local indices */ 1704 PetscScalar *temp_quadrature_constraint; 1705 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1706 /* iterators */ 1707 PetscInt i,j,k,total_counts,temp_start_ptr; 1708 /* BLAS integers */ 1709 PetscBLASInt lwork,lierr; 1710 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1711 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1712 /* reuse */ 1713 PetscInt olocal_primal_size; 1714 PetscInt *oprimal_indices_local_idxs; 1715 /* change of basis */ 1716 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1717 PetscBool boolforchange,qr_needed; 1718 PetscBT touched,change_basis,qr_needed_idx; 1719 /* auxiliary stuff */ 1720 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1721 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1722 PetscScalar *temp_quadrature_constraint_work=NULL; 1723 /* some quantities */ 1724 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1725 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1726 1727 PetscFunctionBegin; 1728 /* Destroy Mat objects computed previously */ 1729 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1731 1732 /* print some info */ 1733 if (pcbddc->dbg_flag) { 1734 IS vertices; 1735 PetscInt nv,nedges,nfaces; 1736 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1737 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1738 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1739 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1741 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_edges);CHKERRQ(ierr); 1743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nedges,pcbddc->use_faces);CHKERRQ(ierr); 1744 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1745 } 1746 1747 if (!pcbddc->adaptive_selection) { 1748 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1749 MatNullSpace nearnullsp; 1750 const Vec *nearnullvecs; 1751 Vec *localnearnullsp; 1752 PetscScalar *array; 1753 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1754 PetscBool nnsp_has_cnst; 1755 /* LAPACK working arrays for SVD or POD */ 1756 PetscBool skip_lapack; 1757 PetscScalar *work; 1758 PetscReal *singular_vals; 1759 #if defined(PETSC_USE_COMPLEX) 1760 PetscReal *rwork; 1761 #endif 1762 #if defined(PETSC_MISSING_LAPACK_GESVD) 1763 PetscScalar *temp_basis,*correlation_mat; 1764 #else 1765 PetscBLASInt dummy_int; 1766 PetscScalar dummy_scalar; 1767 #endif 1768 1769 /* Get index sets for faces, edges and vertices from graph */ 1770 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1771 /* free unneeded index sets */ 1772 if (!pcbddc->use_vertices) { 1773 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1774 } 1775 if (!pcbddc->use_edges) { 1776 for (i=0;i<n_ISForEdges;i++) { 1777 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1778 } 1779 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1780 n_ISForEdges = 0; 1781 } 1782 if (!pcbddc->use_faces) { 1783 for (i=0;i<n_ISForFaces;i++) { 1784 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1785 } 1786 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1787 n_ISForFaces = 0; 1788 } 1789 /* HACKS (the following two blocks of code) */ 1790 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1791 pcbddc->use_change_of_basis = PETSC_TRUE; 1792 if (!ISForEdges) { 1793 pcbddc->use_change_on_faces = PETSC_TRUE; 1794 } 1795 } 1796 if (pcbddc->NullSpace) { 1797 /* use_change_of_basis should be consistent among processors */ 1798 PetscBool tbool[2],gbool[2]; 1799 tbool [0] = pcbddc->use_change_of_basis; 1800 tbool [1] = pcbddc->use_change_on_faces; 1801 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1802 pcbddc->use_change_of_basis = gbool[0]; 1803 pcbddc->use_change_on_faces = gbool[1]; 1804 } 1805 1806 /* check if near null space is attached to global mat */ 1807 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1808 if (nearnullsp) { 1809 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1810 /* remove any stored info */ 1811 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1812 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1813 /* store information for BDDC solver reuse */ 1814 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1815 pcbddc->onearnullspace = nearnullsp; 1816 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1817 for (i=0;i<nnsp_size;i++) { 1818 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1819 } 1820 } else { /* if near null space is not provided BDDC uses constants by default */ 1821 nnsp_size = 0; 1822 nnsp_has_cnst = PETSC_TRUE; 1823 } 1824 /* get max number of constraints on a single cc */ 1825 max_constraints = nnsp_size; 1826 if (nnsp_has_cnst) max_constraints++; 1827 1828 /* 1829 Evaluate maximum storage size needed by the procedure 1830 - temp_indices will contain start index of each constraint stored as follows 1831 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1832 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1833 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1834 */ 1835 total_counts = n_ISForFaces+n_ISForEdges; 1836 total_counts *= max_constraints; 1837 n_vertices = 0; 1838 if (ISForVertices) { 1839 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1840 } 1841 total_counts += n_vertices; 1842 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1843 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1844 total_counts = 0; 1845 max_size_of_constraint = 0; 1846 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1847 if (i<n_ISForEdges) { 1848 used_IS = &ISForEdges[i]; 1849 } else { 1850 used_IS = &ISForFaces[i-n_ISForEdges]; 1851 } 1852 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1853 total_counts += j; 1854 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1855 } 1856 total_counts *= max_constraints; 1857 total_counts += n_vertices; 1858 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1859 /* get local part of global near null space vectors */ 1860 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1861 for (k=0;k<nnsp_size;k++) { 1862 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1863 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1864 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 } 1866 1867 /* whether or not to skip lapack calls */ 1868 skip_lapack = PETSC_TRUE; 1869 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1870 1871 /* allocate some auxiliary stuff */ 1872 if (!skip_lapack || pcbddc->use_qr_single) { 1873 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1874 } 1875 1876 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1877 if (!skip_lapack) { 1878 PetscScalar temp_work; 1879 1880 #if defined(PETSC_MISSING_LAPACK_GESVD) 1881 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1882 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1883 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1884 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1885 #if defined(PETSC_USE_COMPLEX) 1886 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1887 #endif 1888 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1889 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1890 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1891 lwork = -1; 1892 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1893 #if !defined(PETSC_USE_COMPLEX) 1894 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1895 #else 1896 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1897 #endif 1898 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1899 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1900 #else /* on missing GESVD */ 1901 /* SVD */ 1902 PetscInt max_n,min_n; 1903 max_n = max_size_of_constraint; 1904 min_n = max_constraints; 1905 if (max_size_of_constraint < max_constraints) { 1906 min_n = max_size_of_constraint; 1907 max_n = max_constraints; 1908 } 1909 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1910 #if defined(PETSC_USE_COMPLEX) 1911 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1912 #endif 1913 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1914 lwork = -1; 1915 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1916 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1917 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1918 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1919 #if !defined(PETSC_USE_COMPLEX) 1920 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1921 #else 1922 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1923 #endif 1924 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1925 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1926 #endif /* on missing GESVD */ 1927 /* Allocate optimal workspace */ 1928 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1929 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1930 } 1931 /* Now we can loop on constraining sets */ 1932 total_counts = 0; 1933 temp_indices[0] = 0; 1934 /* vertices */ 1935 if (ISForVertices) { 1936 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1937 if (nnsp_has_cnst) { /* consider all vertices */ 1938 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0;i<n_vertices;i++) { 1940 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1941 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1942 total_counts++; 1943 } 1944 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1945 PetscBool used_vertex; 1946 for (i=0;i<n_vertices;i++) { 1947 used_vertex = PETSC_FALSE; 1948 k = 0; 1949 while (!used_vertex && k<nnsp_size) { 1950 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1951 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1952 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1953 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1954 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1955 total_counts++; 1956 used_vertex = PETSC_TRUE; 1957 } 1958 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1959 k++; 1960 } 1961 } 1962 } 1963 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1964 n_vertices = total_counts; 1965 } 1966 1967 /* edges and faces */ 1968 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1969 if (ncc<n_ISForEdges) { 1970 used_IS = &ISForEdges[ncc]; 1971 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1972 } else { 1973 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1974 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1975 } 1976 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1977 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1978 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1979 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1980 /* change of basis should not be performed on local periodic nodes */ 1981 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1982 if (nnsp_has_cnst) { 1983 PetscScalar quad_value; 1984 temp_constraints++; 1985 if (!pcbddc->use_nnsp_true) { 1986 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1987 } else { 1988 quad_value = 1.0; 1989 } 1990 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1991 for (j=0;j<size_of_constraint;j++) { 1992 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1993 } 1994 /* sort by global ordering if using lapack subroutines (not needed!) */ 1995 if (!skip_lapack || pcbddc->use_qr_single) { 1996 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1997 for (j=0;j<size_of_constraint;j++) { 1998 permutation[j]=j; 1999 } 2000 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2001 for (j=0;j<size_of_constraint;j++) { 2002 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2003 } 2004 for (j=0;j<size_of_constraint;j++) { 2005 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2006 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2007 } 2008 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2009 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2010 } 2011 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2012 total_counts++; 2013 } 2014 for (k=0;k<nnsp_size;k++) { 2015 PetscReal real_value; 2016 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2017 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2018 for (j=0;j<size_of_constraint;j++) { 2019 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2020 } 2021 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2022 /* check if array is null on the connected component */ 2023 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2024 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2025 if (real_value > 0.0) { /* keep indices and values */ 2026 /* sort by global ordering if using lapack subroutines */ 2027 if (!skip_lapack || pcbddc->use_qr_single) { 2028 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2029 for (j=0;j<size_of_constraint;j++) { 2030 permutation[j]=j; 2031 } 2032 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2033 for (j=0;j<size_of_constraint;j++) { 2034 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2035 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2036 } 2037 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2038 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2039 } 2040 temp_constraints++; 2041 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2042 total_counts++; 2043 } 2044 } 2045 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2046 valid_constraints = temp_constraints; 2047 if (!pcbddc->use_nnsp_true && temp_constraints) { 2048 if (temp_constraints == 1) { /* just normalize the constraint */ 2049 PetscScalar norm; 2050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2051 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2052 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2053 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2054 } else { /* perform SVD */ 2055 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2056 2057 #if defined(PETSC_MISSING_LAPACK_GESVD) 2058 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2059 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2060 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2061 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2062 from that computed using LAPACKgesvd 2063 -> This is due to a different computation of eigenvectors in LAPACKheev 2064 -> The quality of the POD-computed basis will be the same */ 2065 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2066 /* Store upper triangular part of correlation matrix */ 2067 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2068 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2069 for (j=0;j<temp_constraints;j++) { 2070 for (k=0;k<j+1;k++) { 2071 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2072 } 2073 } 2074 /* compute eigenvalues and eigenvectors of correlation matrix */ 2075 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2077 #if !defined(PETSC_USE_COMPLEX) 2078 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2079 #else 2080 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2081 #endif 2082 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2083 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2084 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2085 j = 0; 2086 while (j < temp_constraints && singular_vals[j] < tol) j++; 2087 total_counts = total_counts-j; 2088 valid_constraints = temp_constraints-j; 2089 /* scale and copy POD basis into used quadrature memory */ 2090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2096 if (j<temp_constraints) { 2097 PetscInt ii; 2098 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2099 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2100 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2101 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2102 for (k=0;k<temp_constraints-j;k++) { 2103 for (ii=0;ii<size_of_constraint;ii++) { 2104 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2105 } 2106 } 2107 } 2108 #else /* on missing GESVD */ 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2112 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2113 #if !defined(PETSC_USE_COMPLEX) 2114 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2115 #else 2116 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2117 #endif 2118 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2119 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2120 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2121 k = temp_constraints; 2122 if (k > size_of_constraint) k = size_of_constraint; 2123 j = 0; 2124 while (j < k && singular_vals[k-j-1] < tol) j++; 2125 valid_constraints = k-j; 2126 total_counts = total_counts-temp_constraints+valid_constraints; 2127 #endif /* on missing GESVD */ 2128 } 2129 } 2130 /* setting change_of_basis flag is safe now */ 2131 if (boolforchange) { 2132 for (j=0;j<valid_constraints;j++) { 2133 PetscBTSet(change_basis,total_counts-j-1); 2134 } 2135 } 2136 } 2137 /* free workspace */ 2138 if (!skip_lapack || pcbddc->use_qr_single) { 2139 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2140 } 2141 if (!skip_lapack) { 2142 ierr = PetscFree(work);CHKERRQ(ierr); 2143 #if defined(PETSC_USE_COMPLEX) 2144 ierr = PetscFree(rwork);CHKERRQ(ierr); 2145 #endif 2146 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2147 #if defined(PETSC_MISSING_LAPACK_GESVD) 2148 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2149 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2150 #endif 2151 } 2152 for (k=0;k<nnsp_size;k++) { 2153 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2154 } 2155 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2156 /* free index sets of faces, edges and vertices */ 2157 for (i=0;i<n_ISForFaces;i++) { 2158 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2159 } 2160 if (n_ISForFaces) { 2161 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2162 } 2163 for (i=0;i<n_ISForEdges;i++) { 2164 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2165 } 2166 if (n_ISForEdges) { 2167 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2168 } 2169 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2170 } else { 2171 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2172 PetscInt cum = 0; 2173 2174 total_counts = 0; 2175 n_vertices = 0; 2176 if (sub_schurs->is_Ej_com) { 2177 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2178 } 2179 max_constraints = 0; 2180 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2181 total_counts += pcbddc->adaptive_constraints_n[i]; 2182 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2183 } 2184 temp_indices = pcbddc->adaptive_constraints_ptrs; 2185 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2186 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2187 2188 #if 0 2189 printf("Found %d totals\n",total_counts); 2190 for (i=0;i<total_counts;i++) { 2191 printf("const %d, start %d",i,temp_indices[i]); 2192 printf(" end %d:\n",temp_indices[i+1]); 2193 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2194 printf(" idxs %d",temp_indices_to_constraint[j]); 2195 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2196 } 2197 } 2198 for (i=0;i<n_vertices;i++) { 2199 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2200 } 2201 for (i=0;i<sub_schurs->n_subs;i++) { 2202 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2203 } 2204 #endif 2205 2206 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2207 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2208 /* Change of basis */ 2209 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2210 if (pcbddc->use_change_of_basis) { 2211 cum = n_vertices; 2212 for (i=0;i<sub_schurs->n_subs;i++) { 2213 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2214 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2215 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2216 } 2217 } 2218 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2219 } 2220 } 2221 } 2222 2223 /* map temp_indices_to_constraint in boundary numbering */ 2224 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2225 if (i != temp_indices[total_counts]) { 2226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2227 } 2228 2229 /* set quantities in pcbddc data structure and store previous primal size */ 2230 /* n_vertices defines the number of subdomain corners in the primal space */ 2231 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2232 olocal_primal_size = pcbddc->local_primal_size; 2233 pcbddc->local_primal_size = total_counts; 2234 pcbddc->n_vertices = n_vertices; 2235 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2236 2237 /* Create constraint matrix */ 2238 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2239 /* so we need to set it up properly either with or without change of basis */ 2240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2241 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2242 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2243 /* array to compute a local numbering of constraints : vertices first then constraints */ 2244 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2245 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2246 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2247 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2248 /* auxiliary stuff for basis change */ 2249 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2250 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2251 2252 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2253 total_primal_vertices=0; 2254 for (i=0;i<pcbddc->local_primal_size;i++) { 2255 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2256 if (size_of_constraint == 1) { 2257 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2258 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2259 aux_primal_minloc[total_primal_vertices]=0; 2260 total_primal_vertices++; 2261 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2262 PetscInt min_loc,min_index; 2263 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2264 /* find first untouched local node */ 2265 k = 0; 2266 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2267 min_index = global_indices[k]; 2268 min_loc = k; 2269 /* search the minimum among global nodes already untouched on the cc */ 2270 for (k=1;k<size_of_constraint;k++) { 2271 /* there can be more than one constraint on a single connected component */ 2272 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2273 min_index = global_indices[k]; 2274 min_loc = k; 2275 } 2276 } 2277 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2278 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2279 aux_primal_minloc[total_primal_vertices]=min_loc; 2280 total_primal_vertices++; 2281 } 2282 } 2283 /* determine if a QR strategy is needed for change of basis */ 2284 qr_needed = PETSC_FALSE; 2285 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2286 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2287 if (PetscBTLookup(change_basis,i)) { 2288 if (!pcbddc->use_qr_single) { 2289 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2290 j = 0; 2291 for (k=0;k<size_of_constraint;k++) { 2292 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2293 j++; 2294 } 2295 } 2296 /* found more than one primal dof on the cc */ 2297 if (j > 1) { 2298 PetscBTSet(qr_needed_idx,i); 2299 qr_needed = PETSC_TRUE; 2300 } 2301 } else { 2302 PetscBTSet(qr_needed_idx,i); 2303 qr_needed = PETSC_TRUE; 2304 } 2305 } 2306 } 2307 /* free workspace */ 2308 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2309 2310 /* permute indices in order to have a sorted set of vertices */ 2311 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2312 2313 /* nonzero structure of constraint matrix */ 2314 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2315 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2316 j=total_primal_vertices; 2317 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2318 if (!PetscBTLookup(change_basis,i)) { 2319 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2320 j++; 2321 } 2322 } 2323 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2324 ierr = PetscFree(nnz);CHKERRQ(ierr); 2325 /* set values in constraint matrix */ 2326 for (i=0;i<total_primal_vertices;i++) { 2327 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2328 } 2329 total_counts = total_primal_vertices; 2330 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2331 if (!PetscBTLookup(change_basis,i)) { 2332 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2333 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2334 total_counts++; 2335 } 2336 } 2337 /* assembling */ 2338 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 /* 2341 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2342 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2343 */ 2344 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2345 if (pcbddc->use_change_of_basis) { 2346 /* dual and primal dofs on a single cc */ 2347 PetscInt dual_dofs,primal_dofs; 2348 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2349 PetscInt primal_counter; 2350 /* working stuff for GEQRF */ 2351 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2352 PetscBLASInt lqr_work; 2353 /* working stuff for UNGQR */ 2354 PetscScalar *gqr_work,lgqr_work_t; 2355 PetscBLASInt lgqr_work; 2356 /* working stuff for TRTRS */ 2357 PetscScalar *trs_rhs; 2358 PetscBLASInt Blas_NRHS; 2359 /* pointers for values insertion into change of basis matrix */ 2360 PetscInt *start_rows,*start_cols; 2361 PetscScalar *start_vals; 2362 /* working stuff for values insertion */ 2363 PetscBT is_primal; 2364 /* matrix sizes */ 2365 PetscInt global_size,local_size; 2366 /* temporary change of basis */ 2367 Mat localChangeOfBasisMatrix; 2368 /* extra space for debugging */ 2369 PetscScalar *dbg_work; 2370 2371 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2372 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2373 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2374 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2375 /* nonzeros for local mat */ 2376 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2377 for (i=0;i<pcis->n;i++) nnz[i]=1; 2378 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2379 if (PetscBTLookup(change_basis,i)) { 2380 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2381 if (PetscBTLookup(qr_needed_idx,i)) { 2382 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2383 } else { 2384 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2385 /* get local primal index on the cc */ 2386 j = 0; 2387 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2388 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2389 } 2390 } 2391 } 2392 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2393 ierr = PetscFree(nnz);CHKERRQ(ierr); 2394 /* Set initial identity in the matrix */ 2395 for (i=0;i<pcis->n;i++) { 2396 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2397 } 2398 2399 if (pcbddc->dbg_flag) { 2400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2402 } 2403 2404 2405 /* Now we loop on the constraints which need a change of basis */ 2406 /* 2407 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2408 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2409 2410 Basic blocks of change of basis matrix T computed by 2411 2412 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2413 2414 | 1 0 ... 0 s_1/S | 2415 | 0 1 ... 0 s_2/S | 2416 | ... | 2417 | 0 ... 1 s_{n-1}/S | 2418 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2419 2420 with S = \sum_{i=1}^n s_i^2 2421 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2422 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2423 2424 - QR decomposition of constraints otherwise 2425 */ 2426 if (qr_needed) { 2427 /* space to store Q */ 2428 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2429 /* first we issue queries for optimal work */ 2430 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2431 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2432 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2433 lqr_work = -1; 2434 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2435 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2436 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2437 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2438 lgqr_work = -1; 2439 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2440 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2441 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2442 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2443 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2444 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2445 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2446 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2447 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2448 /* array to store scaling factors for reflectors */ 2449 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2450 /* array to store rhs and solution of triangular solver */ 2451 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2452 /* allocating workspace for check */ 2453 if (pcbddc->dbg_flag) { 2454 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2455 } 2456 } 2457 /* array to store whether a node is primal or not */ 2458 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2459 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2460 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2461 if (i != total_primal_vertices) { 2462 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2463 } 2464 for (i=0;i<total_primal_vertices;i++) { 2465 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2466 } 2467 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2468 2469 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2470 /* -> using implicit ordering contained in temp_indices data */ 2471 total_counts = pcbddc->n_vertices; 2472 primal_counter = total_counts; 2473 while (total_counts<pcbddc->local_primal_size) { 2474 primal_dofs = 1; 2475 if (PetscBTLookup(change_basis,total_counts)) { 2476 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2477 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2478 primal_dofs++; 2479 } 2480 /* get constraint info */ 2481 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2482 dual_dofs = size_of_constraint-primal_dofs; 2483 2484 if (pcbddc->dbg_flag) { 2485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2486 } 2487 2488 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2489 2490 /* copy quadrature constraints for change of basis check */ 2491 if (pcbddc->dbg_flag) { 2492 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2493 } 2494 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2495 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2496 2497 /* compute QR decomposition of constraints */ 2498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2499 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2502 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2503 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2505 2506 /* explictly compute R^-T */ 2507 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2508 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2509 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2510 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2513 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2514 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2515 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2516 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2517 2518 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2521 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2523 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2524 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2525 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2527 2528 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2529 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2530 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2532 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2538 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2540 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2541 2542 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2543 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2544 /* insert cols for primal dofs */ 2545 for (j=0;j<primal_dofs;j++) { 2546 start_vals = &qr_basis[j*size_of_constraint]; 2547 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2548 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2549 } 2550 /* insert cols for dual dofs */ 2551 for (j=0,k=0;j<dual_dofs;k++) { 2552 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2553 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2554 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2555 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2556 j++; 2557 } 2558 } 2559 2560 /* check change of basis */ 2561 if (pcbddc->dbg_flag) { 2562 PetscInt ii,jj; 2563 PetscBool valid_qr=PETSC_TRUE; 2564 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2570 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2571 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2572 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2573 for (jj=0;jj<size_of_constraint;jj++) { 2574 for (ii=0;ii<primal_dofs;ii++) { 2575 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2576 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2577 } 2578 } 2579 if (!valid_qr) { 2580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2581 for (jj=0;jj<size_of_constraint;jj++) { 2582 for (ii=0;ii<primal_dofs;ii++) { 2583 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2584 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2585 } 2586 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2587 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2588 } 2589 } 2590 } 2591 } else { 2592 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2593 } 2594 } 2595 } else { /* simple transformation block */ 2596 PetscInt row,col; 2597 PetscScalar val,norm; 2598 2599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2600 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2601 for (j=0;j<size_of_constraint;j++) { 2602 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2603 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2604 if (!PetscBTLookup(is_primal,row_B)) { 2605 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2606 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2607 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2608 } else { 2609 for (k=0;k<size_of_constraint;k++) { 2610 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2611 if (row != col) { 2612 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2613 } else { 2614 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2615 } 2616 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2617 } 2618 } 2619 } 2620 if (pcbddc->dbg_flag) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2622 } 2623 } 2624 /* increment primal counter */ 2625 primal_counter += primal_dofs; 2626 } else { 2627 if (pcbddc->dbg_flag) { 2628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2629 } 2630 } 2631 /* increment constraint counter total_counts */ 2632 total_counts += primal_dofs; 2633 } 2634 2635 /* free workspace */ 2636 if (qr_needed) { 2637 if (pcbddc->dbg_flag) { 2638 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2639 } 2640 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2641 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2642 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2643 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2644 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2645 } 2646 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2647 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 2650 /* assembling of global change of variable */ 2651 { 2652 Mat tmat; 2653 PetscInt bs; 2654 2655 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2656 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2657 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2658 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2659 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2660 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2661 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2662 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2663 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2664 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2665 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2666 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2667 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2668 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2669 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2670 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2671 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2672 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2673 } 2674 /* check */ 2675 if (pcbddc->dbg_flag) { 2676 PetscReal error; 2677 Vec x,x_change; 2678 2679 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2680 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2681 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2682 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2683 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2684 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2686 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2688 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2689 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2690 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2693 ierr = VecDestroy(&x);CHKERRQ(ierr); 2694 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2695 } 2696 2697 /* adapt sub_schurs computed (if any) */ 2698 if (pcbddc->use_deluxe_scaling) { 2699 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2700 if (sub_schurs->n_subs_par_g) { 2701 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2702 } 2703 if (sub_schurs->S_Ej_all) { 2704 Mat S_1,S_2,tmat; 2705 IS is_all_N; 2706 2707 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2708 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2709 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2710 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2711 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2712 sub_schurs->S_Ej_all = S_1; 2713 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2714 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2715 sub_schurs->sum_S_Ej_all = S_2; 2716 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2717 } 2718 } 2719 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2720 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2721 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2722 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2723 } 2724 2725 /* set up change of basis context */ 2726 if (pcbddc->ChangeOfBasisMatrix) { 2727 PCBDDCChange_ctx change_ctx; 2728 2729 if (!pcbddc->new_global_mat) { 2730 PetscInt global_size,local_size; 2731 2732 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2733 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2734 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2735 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2736 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2737 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2738 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2739 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2740 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2741 } else { 2742 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2743 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2744 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2745 } 2746 if (!pcbddc->user_ChangeOfBasisMatrix) { 2747 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2748 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2749 } else { 2750 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2751 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2752 } 2753 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2754 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2755 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 } 2758 2759 /* get indices in local ordering for vertices and constraints */ 2760 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2761 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2762 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2763 } 2764 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2765 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2766 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2767 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2768 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2769 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2771 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2773 /* set quantities in PCBDDC data struct */ 2774 pcbddc->n_actual_vertices = i; 2775 /* check if a new primal space has been introduced */ 2776 pcbddc->new_primal_space_local = PETSC_TRUE; 2777 if (olocal_primal_size == pcbddc->local_primal_size) { 2778 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2779 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2780 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2781 } 2782 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2783 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2784 2785 /* flush dbg viewer */ 2786 if (pcbddc->dbg_flag) { 2787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2788 } 2789 2790 /* free workspace */ 2791 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2792 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2793 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2794 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2795 if (!pcbddc->adaptive_selection) { 2796 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2797 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2798 } else { 2799 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2800 pcbddc->adaptive_constraints_ptrs, 2801 pcbddc->adaptive_constraints_idxs, 2802 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2803 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2804 } 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2810 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2811 { 2812 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2813 PC_IS *pcis = (PC_IS*)pc->data; 2814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2815 PetscInt ierr,i,vertex_size; 2816 PetscViewer viewer=pcbddc->dbg_viewer; 2817 2818 PetscFunctionBegin; 2819 /* Reset previously computed graph */ 2820 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2821 /* Init local Graph struct */ 2822 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2823 2824 /* Check validity of the csr graph passed in by the user */ 2825 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2826 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2827 } 2828 2829 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2830 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2831 PetscInt *xadj,*adjncy; 2832 PetscInt nvtxs; 2833 2834 if (pcbddc->use_local_adj) { 2835 PetscBool flg_row=PETSC_FALSE; 2836 2837 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2838 if (flg_row) { 2839 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2840 pcbddc->computed_rowadj = PETSC_TRUE; 2841 } 2842 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2843 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2844 IS is_dummy; 2845 ISLocalToGlobalMapping l2gmap_dummy; 2846 PetscInt j,sum; 2847 PetscInt *cxadj,*cadjncy; 2848 const PetscInt *idxs; 2849 PCBDDCGraph graph; 2850 PetscBT is_on_boundary; 2851 2852 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2853 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2854 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2855 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2856 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2857 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2858 graph->xadj = xadj; 2859 graph->adjncy = adjncy; 2860 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2861 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2862 2863 if (pcbddc->dbg_flag) { 2864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2865 for (i=0;i<graph->ncc;i++) { 2866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2867 } 2868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2869 } 2870 2871 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2872 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2873 for (i=0;i<pcis->n_B;i++) { 2874 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2875 } 2876 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2877 2878 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2879 sum = 0; 2880 for (i=0;i<graph->ncc;i++) { 2881 PetscInt sizecc = 0; 2882 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2883 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2884 sizecc++; 2885 } 2886 } 2887 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2888 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2889 cxadj[graph->queue[j]] = sizecc; 2890 } 2891 } 2892 sum += sizecc*sizecc; 2893 } 2894 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2895 sum = 0; 2896 for (i=0;i<nvtxs;i++) { 2897 PetscInt temp = cxadj[i]; 2898 cxadj[i] = sum; 2899 sum += temp; 2900 } 2901 cxadj[nvtxs] = sum; 2902 for (i=0;i<graph->ncc;i++) { 2903 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2904 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2905 PetscInt k,sizecc = 0; 2906 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2907 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2908 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2909 sizecc++; 2910 } 2911 } 2912 } 2913 } 2914 } 2915 if (nvtxs) { 2916 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2917 } else { 2918 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2919 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2920 } 2921 graph->xadj = 0; 2922 graph->adjncy = 0; 2923 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2924 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2925 } 2926 } 2927 2928 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2929 vertex_size = 1; 2930 if (pcbddc->user_provided_isfordofs) { 2931 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2932 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2933 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2934 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2935 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2936 } 2937 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2938 pcbddc->n_ISForDofs = 0; 2939 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2940 } 2941 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2942 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2943 } else { 2944 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2945 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2948 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2949 } 2950 } 2951 } 2952 2953 /* Setup of Graph */ 2954 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2955 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2956 } 2957 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2958 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2959 } 2960 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2961 2962 /* Graph's connected components analysis */ 2963 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2964 2965 /* print some info to stdout */ 2966 if (pcbddc->dbg_flag) { 2967 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2968 } 2969 2970 /* mark topography has done */ 2971 pcbddc->recompute_topography = PETSC_FALSE; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2977 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2978 { 2979 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2980 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2981 PetscErrorCode ierr; 2982 2983 PetscFunctionBegin; 2984 n = 0; 2985 vertices = 0; 2986 if (pcbddc->ConstraintMatrix) { 2987 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2988 for (i=0;i<local_primal_size;i++) { 2989 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2990 if (size_of_constraint == 1) n++; 2991 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2992 } 2993 if (vertices_idx) { 2994 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2995 n = 0; 2996 for (i=0;i<local_primal_size;i++) { 2997 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2998 if (size_of_constraint == 1) { 2999 vertices[n++]=row_cmat_indices[0]; 3000 } 3001 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3002 } 3003 } 3004 } 3005 *n_vertices = n; 3006 if (vertices_idx) *vertices_idx = vertices; 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3012 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3013 { 3014 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3015 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3016 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3017 PetscBT touched; 3018 PetscErrorCode ierr; 3019 3020 /* This function assumes that the number of local constraints per connected component 3021 is not greater than the number of nodes defined for the connected component 3022 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3023 PetscFunctionBegin; 3024 n = 0; 3025 constraints_index = 0; 3026 if (pcbddc->ConstraintMatrix) { 3027 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3028 max_size_of_constraint = 0; 3029 for (i=0;i<local_primal_size;i++) { 3030 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3031 if (size_of_constraint > 1) { 3032 n++; 3033 } 3034 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3035 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3036 } 3037 if (constraints_idx) { 3038 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3039 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3040 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3041 n = 0; 3042 for (i=0;i<local_primal_size;i++) { 3043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3044 if (size_of_constraint > 1) { 3045 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3046 /* find first untouched local node */ 3047 j = 0; 3048 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3049 min_index = row_cmat_global_indices[j]; 3050 min_loc = j; 3051 /* search the minimum among nodes not yet touched on the connected component 3052 since there can be more than one constraint on a single cc */ 3053 for (j=1;j<size_of_constraint;j++) { 3054 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3055 min_index = row_cmat_global_indices[j]; 3056 min_loc = j; 3057 } 3058 } 3059 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3060 constraints_index[n++] = row_cmat_indices[min_loc]; 3061 } 3062 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3063 } 3064 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3065 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3066 } 3067 } 3068 *n_constraints = n; 3069 if (constraints_idx) *constraints_idx = constraints_index; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "PCBDDCSubsetNumbering" 3075 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3076 { 3077 Vec local_vec,global_vec; 3078 IS seqis,paris; 3079 VecScatter scatter_ctx; 3080 PetscScalar *array; 3081 PetscInt *temp_global_dofs; 3082 PetscScalar globalsum; 3083 PetscInt i,j,s; 3084 PetscInt nlocals,first_index,old_index,max_local; 3085 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3086 PetscMPIInt *dof_sizes,*dof_displs; 3087 PetscBool first_found; 3088 PetscErrorCode ierr; 3089 3090 PetscFunctionBegin; 3091 /* mpi buffers */ 3092 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3093 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3094 j = ( !rank_prec_comm ? size_prec_comm : 0); 3095 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3096 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3097 /* get maximum size of subset */ 3098 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3099 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3100 max_local = 0; 3101 for (i=0;i<n_local_dofs;i++) { 3102 if (max_local < temp_global_dofs[i] ) { 3103 max_local = temp_global_dofs[i]; 3104 } 3105 } 3106 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3107 max_global++; 3108 max_local = 0; 3109 for (i=0;i<n_local_dofs;i++) { 3110 if (max_local < local_dofs[i] ) { 3111 max_local = local_dofs[i]; 3112 } 3113 } 3114 max_local++; 3115 /* allocate workspace */ 3116 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3117 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3118 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3119 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3120 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3121 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3122 /* create scatter */ 3123 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3124 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3125 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3126 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3127 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3128 /* init array */ 3129 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3130 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3131 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3132 if (local_dofs_mult) { 3133 for (i=0;i<n_local_dofs;i++) { 3134 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3135 } 3136 } else { 3137 for (i=0;i<n_local_dofs;i++) { 3138 array[local_dofs[i]]=1.0; 3139 } 3140 } 3141 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3142 /* scatter into global vec and get total number of global dofs */ 3143 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3146 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3147 /* Fill global_vec with cumulative function for global numbering */ 3148 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3149 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3150 nlocals = 0; 3151 first_index = -1; 3152 first_found = PETSC_FALSE; 3153 for (i=0;i<s;i++) { 3154 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3155 first_found = PETSC_TRUE; 3156 first_index = i; 3157 } 3158 nlocals += (PetscInt)PetscRealPart(array[i]); 3159 } 3160 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3161 if (!rank_prec_comm) { 3162 dof_displs[0]=0; 3163 for (i=1;i<size_prec_comm;i++) { 3164 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3165 } 3166 } 3167 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3168 if (first_found) { 3169 array[first_index] += (PetscScalar)nlocals; 3170 old_index = first_index; 3171 for (i=first_index+1;i<s;i++) { 3172 if (PetscRealPart(array[i]) > 0.1) { 3173 array[i] += array[old_index]; 3174 old_index = i; 3175 } 3176 } 3177 } 3178 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3179 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3180 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3181 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 /* get global ordering of local dofs */ 3183 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3184 if (local_dofs_mult) { 3185 for (i=0;i<n_local_dofs;i++) { 3186 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3187 } 3188 } else { 3189 for (i=0;i<n_local_dofs;i++) { 3190 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3191 } 3192 } 3193 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3194 /* free workspace */ 3195 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3196 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3197 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3198 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3199 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3200 /* return pointer to global ordering of local dofs */ 3201 *global_numbering_subset = temp_global_dofs; 3202 PetscFunctionReturn(0); 3203 } 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3207 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3208 { 3209 PetscInt i,j; 3210 PetscScalar *alphas; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 /* this implements stabilized Gram-Schmidt */ 3215 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3216 for (i=0;i<n;i++) { 3217 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3218 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3219 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3220 } 3221 ierr = PetscFree(alphas);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatISGetSubassemblingPattern" 3227 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3228 { 3229 Mat subdomain_adj; 3230 IS new_ranks,ranks_send_to; 3231 MatPartitioning partitioner; 3232 Mat_IS *matis; 3233 PetscInt n_neighs,*neighs,*n_shared,**shared; 3234 PetscInt prank; 3235 PetscMPIInt size,rank,color; 3236 PetscInt *xadj,*adjncy,*oldranks; 3237 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3238 PetscInt i,local_size,threshold=0; 3239 PetscErrorCode ierr; 3240 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3241 PetscSubcomm subcomm; 3242 3243 PetscFunctionBegin; 3244 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3245 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3246 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3247 3248 /* Get info on mapping */ 3249 matis = (Mat_IS*)(mat->data); 3250 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3251 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3252 3253 /* build local CSR graph of subdomains' connectivity */ 3254 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3255 xadj[0] = 0; 3256 xadj[1] = PetscMax(n_neighs-1,0); 3257 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3258 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3259 3260 if (threshold) { 3261 PetscInt xadj_count = 0; 3262 for (i=1;i<n_neighs;i++) { 3263 if (n_shared[i] > threshold) { 3264 adjncy[xadj_count] = neighs[i]; 3265 adjncy_wgt[xadj_count] = n_shared[i]; 3266 xadj_count++; 3267 } 3268 } 3269 xadj[1] = xadj_count; 3270 } else { 3271 if (xadj[1]) { 3272 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3273 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3274 } 3275 } 3276 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3277 if (use_square) { 3278 for (i=0;i<xadj[1];i++) { 3279 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3280 } 3281 } 3282 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3283 3284 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3285 3286 /* 3287 Restrict work on active processes only. 3288 */ 3289 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3290 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3291 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3292 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3293 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3294 if (color) { 3295 ierr = PetscFree(xadj);CHKERRQ(ierr); 3296 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3297 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3298 } else { 3299 PetscInt coarsening_ratio; 3300 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3301 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3302 prank = rank; 3303 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3304 /* 3305 for (i=0;i<size;i++) { 3306 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3307 } 3308 */ 3309 for (i=0;i<xadj[1];i++) { 3310 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3311 } 3312 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3313 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3314 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3315 3316 /* Partition */ 3317 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3318 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3319 if (use_vwgt) { 3320 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3321 v_wgt[0] = local_size; 3322 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3323 } 3324 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3325 coarsening_ratio = size/n_subdomains; 3326 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3327 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3328 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3329 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3330 3331 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3332 if (contiguous) { 3333 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3334 } else { 3335 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3336 } 3337 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3338 /* clean up */ 3339 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3340 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3341 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3342 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3343 } 3344 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3345 3346 /* assemble parallel IS for sends */ 3347 i = 1; 3348 if (color) i=0; 3349 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3350 3351 /* get back IS */ 3352 *is_sends = ranks_send_to; 3353 PetscFunctionReturn(0); 3354 } 3355 3356 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatISSubassemble" 3360 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3361 { 3362 Mat local_mat; 3363 Mat_IS *matis; 3364 IS is_sends_internal; 3365 PetscInt rows,cols,new_local_rows; 3366 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3367 PetscBool ismatis,isdense,newisdense,destroy_mat; 3368 ISLocalToGlobalMapping l2gmap; 3369 PetscInt* l2gmap_indices; 3370 const PetscInt* is_indices; 3371 MatType new_local_type; 3372 /* buffers */ 3373 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3374 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3375 PetscInt *recv_buffer_idxs_local; 3376 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3377 /* MPI */ 3378 MPI_Comm comm,comm_n; 3379 PetscSubcomm subcomm; 3380 PetscMPIInt n_sends,n_recvs,commsize; 3381 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3382 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3383 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3384 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3385 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3386 PetscErrorCode ierr; 3387 3388 PetscFunctionBegin; 3389 /* TODO: add missing checks */ 3390 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3391 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3392 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3393 PetscValidLogicalCollectiveInt(mat,nis,7); 3394 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3395 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3396 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3397 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3398 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3399 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3400 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3401 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3402 PetscInt mrows,mcols,mnrows,mncols; 3403 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3404 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3405 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3406 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3407 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3408 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3409 } 3410 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3411 PetscValidLogicalCollectiveInt(mat,bs,0); 3412 /* prepare IS for sending if not provided */ 3413 if (!is_sends) { 3414 PetscBool pcontig = PETSC_TRUE; 3415 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3416 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3417 } else { 3418 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3419 is_sends_internal = is_sends; 3420 } 3421 3422 /* get pointer of MATIS data */ 3423 matis = (Mat_IS*)mat->data; 3424 3425 /* get comm */ 3426 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3427 3428 /* compute number of sends */ 3429 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3430 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3431 3432 /* compute number of receives */ 3433 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3434 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3435 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3436 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3437 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3438 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3439 ierr = PetscFree(iflags);CHKERRQ(ierr); 3440 3441 /* restrict comm if requested */ 3442 subcomm = 0; 3443 destroy_mat = PETSC_FALSE; 3444 if (restrict_comm) { 3445 PetscMPIInt color,subcommsize; 3446 3447 color = 0; 3448 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3449 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3450 subcommsize = commsize - subcommsize; 3451 /* check if reuse has been requested */ 3452 if (reuse == MAT_REUSE_MATRIX) { 3453 if (*mat_n) { 3454 PetscMPIInt subcommsize2; 3455 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3456 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3457 comm_n = PetscObjectComm((PetscObject)*mat_n); 3458 } else { 3459 comm_n = PETSC_COMM_SELF; 3460 } 3461 } else { /* MAT_INITIAL_MATRIX */ 3462 PetscMPIInt rank; 3463 3464 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3465 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3466 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3467 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3468 comm_n = subcomm->comm; 3469 } 3470 /* flag to destroy *mat_n if not significative */ 3471 if (color) destroy_mat = PETSC_TRUE; 3472 } else { 3473 comm_n = comm; 3474 } 3475 3476 /* prepare send/receive buffers */ 3477 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3478 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3479 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3480 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3481 if (nis) { 3482 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3483 } 3484 3485 /* Get data from local matrices */ 3486 if (!isdense) { 3487 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3488 /* TODO: See below some guidelines on how to prepare the local buffers */ 3489 /* 3490 send_buffer_vals should contain the raw values of the local matrix 3491 send_buffer_idxs should contain: 3492 - MatType_PRIVATE type 3493 - PetscInt size_of_l2gmap 3494 - PetscInt global_row_indices[size_of_l2gmap] 3495 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3496 */ 3497 } else { 3498 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3499 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3500 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3501 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3502 send_buffer_idxs[1] = i; 3503 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3504 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3506 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3507 for (i=0;i<n_sends;i++) { 3508 ilengths_vals[is_indices[i]] = len*len; 3509 ilengths_idxs[is_indices[i]] = len+2; 3510 } 3511 } 3512 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3513 /* additional is (if any) */ 3514 if (nis) { 3515 PetscMPIInt psum; 3516 PetscInt j; 3517 for (j=0,psum=0;j<nis;j++) { 3518 PetscInt plen; 3519 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3520 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3521 psum += len+1; /* indices + lenght */ 3522 } 3523 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3524 for (j=0,psum=0;j<nis;j++) { 3525 PetscInt plen; 3526 const PetscInt *is_array_idxs; 3527 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3528 send_buffer_idxs_is[psum] = plen; 3529 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3530 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3531 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3532 psum += plen+1; /* indices + lenght */ 3533 } 3534 for (i=0;i<n_sends;i++) { 3535 ilengths_idxs_is[is_indices[i]] = psum; 3536 } 3537 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3538 } 3539 3540 buf_size_idxs = 0; 3541 buf_size_vals = 0; 3542 buf_size_idxs_is = 0; 3543 for (i=0;i<n_recvs;i++) { 3544 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3545 buf_size_vals += (PetscInt)olengths_vals[i]; 3546 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3547 } 3548 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3550 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3551 3552 /* get new tags for clean communications */ 3553 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3554 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3555 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3556 3557 /* allocate for requests */ 3558 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3559 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3560 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3562 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3563 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3564 3565 /* communications */ 3566 ptr_idxs = recv_buffer_idxs; 3567 ptr_vals = recv_buffer_vals; 3568 ptr_idxs_is = recv_buffer_idxs_is; 3569 for (i=0;i<n_recvs;i++) { 3570 source_dest = onodes[i]; 3571 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3572 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3573 ptr_idxs += olengths_idxs[i]; 3574 ptr_vals += olengths_vals[i]; 3575 if (nis) { 3576 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3577 ptr_idxs_is += olengths_idxs_is[i]; 3578 } 3579 } 3580 for (i=0;i<n_sends;i++) { 3581 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3582 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3583 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3584 if (nis) { 3585 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3586 } 3587 } 3588 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3589 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3590 3591 /* assemble new l2g map */ 3592 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3593 ptr_idxs = recv_buffer_idxs; 3594 new_local_rows = 0; 3595 for (i=0;i<n_recvs;i++) { 3596 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3597 ptr_idxs += olengths_idxs[i]; 3598 } 3599 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3600 ptr_idxs = recv_buffer_idxs; 3601 new_local_rows = 0; 3602 for (i=0;i<n_recvs;i++) { 3603 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3604 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3605 ptr_idxs += olengths_idxs[i]; 3606 } 3607 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3608 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3609 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3610 3611 /* infer new local matrix type from received local matrices type */ 3612 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3613 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3614 if (n_recvs) { 3615 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3616 ptr_idxs = recv_buffer_idxs; 3617 for (i=0;i<n_recvs;i++) { 3618 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3619 new_local_type_private = MATAIJ_PRIVATE; 3620 break; 3621 } 3622 ptr_idxs += olengths_idxs[i]; 3623 } 3624 switch (new_local_type_private) { 3625 case MATDENSE_PRIVATE: 3626 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3627 new_local_type = MATSEQAIJ; 3628 bs = 1; 3629 } else { /* if I receive only 1 dense matrix */ 3630 new_local_type = MATSEQDENSE; 3631 bs = 1; 3632 } 3633 break; 3634 case MATAIJ_PRIVATE: 3635 new_local_type = MATSEQAIJ; 3636 bs = 1; 3637 break; 3638 case MATBAIJ_PRIVATE: 3639 new_local_type = MATSEQBAIJ; 3640 break; 3641 case MATSBAIJ_PRIVATE: 3642 new_local_type = MATSEQSBAIJ; 3643 break; 3644 default: 3645 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3646 break; 3647 } 3648 } else { /* by default, new_local_type is seqdense */ 3649 new_local_type = MATSEQDENSE; 3650 bs = 1; 3651 } 3652 3653 /* create MATIS object if needed */ 3654 if (reuse == MAT_INITIAL_MATRIX) { 3655 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3656 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3657 } else { 3658 /* it also destroys the local matrices */ 3659 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3660 } 3661 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3662 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3663 3664 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3665 3666 /* Global to local map of received indices */ 3667 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3668 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3669 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3670 3671 /* restore attributes -> type of incoming data and its size */ 3672 buf_size_idxs = 0; 3673 for (i=0;i<n_recvs;i++) { 3674 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3675 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3676 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3677 } 3678 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3679 3680 /* set preallocation */ 3681 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3682 if (!newisdense) { 3683 PetscInt *new_local_nnz=0; 3684 3685 ptr_vals = recv_buffer_vals; 3686 ptr_idxs = recv_buffer_idxs_local; 3687 if (n_recvs) { 3688 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3689 } 3690 for (i=0;i<n_recvs;i++) { 3691 PetscInt j; 3692 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3693 for (j=0;j<*(ptr_idxs+1);j++) { 3694 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3695 } 3696 } else { 3697 /* TODO */ 3698 } 3699 ptr_idxs += olengths_idxs[i]; 3700 } 3701 if (new_local_nnz) { 3702 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3703 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3704 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3705 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3706 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3707 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3708 } else { 3709 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3710 } 3711 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3712 } else { 3713 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3714 } 3715 3716 /* set values */ 3717 ptr_vals = recv_buffer_vals; 3718 ptr_idxs = recv_buffer_idxs_local; 3719 for (i=0;i<n_recvs;i++) { 3720 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3721 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3722 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3723 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3725 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3726 } else { 3727 /* TODO */ 3728 } 3729 ptr_idxs += olengths_idxs[i]; 3730 ptr_vals += olengths_vals[i]; 3731 } 3732 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3733 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3737 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3738 3739 #if 0 3740 if (!restrict_comm) { /* check */ 3741 Vec lvec,rvec; 3742 PetscReal infty_error; 3743 3744 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3745 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3746 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3747 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3748 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3749 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3750 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3751 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3752 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3753 } 3754 #endif 3755 3756 /* assemble new additional is (if any) */ 3757 if (nis) { 3758 PetscInt **temp_idxs,*count_is,j,psum; 3759 3760 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3761 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3762 ptr_idxs = recv_buffer_idxs_is; 3763 psum = 0; 3764 for (i=0;i<n_recvs;i++) { 3765 for (j=0;j<nis;j++) { 3766 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3767 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3768 psum += plen; 3769 ptr_idxs += plen+1; /* shift pointer to received data */ 3770 } 3771 } 3772 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3773 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3774 for (i=1;i<nis;i++) { 3775 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3776 } 3777 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3778 ptr_idxs = recv_buffer_idxs_is; 3779 for (i=0;i<n_recvs;i++) { 3780 for (j=0;j<nis;j++) { 3781 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3782 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3783 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3784 ptr_idxs += plen+1; /* shift pointer to received data */ 3785 } 3786 } 3787 for (i=0;i<nis;i++) { 3788 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3789 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3790 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3791 } 3792 ierr = PetscFree(count_is);CHKERRQ(ierr); 3793 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3794 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3795 } 3796 /* free workspace */ 3797 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3798 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3799 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3800 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3801 if (isdense) { 3802 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3803 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3804 } else { 3805 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3806 } 3807 if (nis) { 3808 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3809 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3810 } 3811 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3812 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3813 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3814 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3815 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3816 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3817 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3818 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3819 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3820 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3821 ierr = PetscFree(onodes);CHKERRQ(ierr); 3822 if (nis) { 3823 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3824 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3825 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3826 } 3827 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3828 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3829 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3830 for (i=0;i<nis;i++) { 3831 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3832 } 3833 } 3834 PetscFunctionReturn(0); 3835 } 3836 3837 /* temporary hack into ksp private data structure */ 3838 #include <petsc-private/kspimpl.h> 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3842 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3843 { 3844 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3845 PC_IS *pcis = (PC_IS*)pc->data; 3846 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3847 MatNullSpace CoarseNullSpace=NULL; 3848 ISLocalToGlobalMapping coarse_islg; 3849 IS coarse_is,*isarray; 3850 PetscInt i,im_active=-1,active_procs=-1; 3851 PetscInt nis,nisdofs,nisneu; 3852 PC pc_temp; 3853 PCType coarse_pc_type; 3854 KSPType coarse_ksp_type; 3855 PetscBool multilevel_requested,multilevel_allowed; 3856 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3857 Mat t_coarse_mat_is; 3858 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3859 PetscMPIInt all_procs; 3860 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3861 PetscBool compute_vecs = PETSC_FALSE; 3862 PetscScalar *array; 3863 PetscErrorCode ierr; 3864 3865 PetscFunctionBegin; 3866 /* Assign global numbering to coarse dofs */ 3867 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3868 compute_vecs = PETSC_TRUE; 3869 PetscInt ocoarse_size; 3870 ocoarse_size = pcbddc->coarse_size; 3871 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3872 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3873 /* see if we can avoid some work */ 3874 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3875 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3876 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3877 coarse_reuse = PETSC_FALSE; 3878 } else { /* we can safely reuse already computed coarse matrix */ 3879 coarse_reuse = PETSC_TRUE; 3880 } 3881 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3882 coarse_reuse = PETSC_FALSE; 3883 } 3884 /* reset any subassembling information */ 3885 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3886 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3887 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3888 coarse_reuse = PETSC_TRUE; 3889 } 3890 3891 /* count "active" (i.e. with positive local size) and "void" processes */ 3892 im_active = !!(pcis->n); 3893 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3894 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3895 void_procs = all_procs-active_procs; 3896 csin_type_simple = PETSC_TRUE; 3897 redist = PETSC_FALSE; 3898 if (pcbddc->current_level && void_procs) { 3899 csin_ml = PETSC_TRUE; 3900 ncoarse_ml = void_procs; 3901 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3902 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3903 csin_ds = PETSC_TRUE; 3904 ncoarse_ds = pcbddc->redistribute_coarse; 3905 redist = PETSC_TRUE; 3906 } else { 3907 csin_ds = PETSC_TRUE; 3908 ncoarse_ds = active_procs; 3909 redist = PETSC_TRUE; 3910 } 3911 } else { 3912 csin_ml = PETSC_FALSE; 3913 ncoarse_ml = all_procs; 3914 if (void_procs) { 3915 csin_ds = PETSC_TRUE; 3916 ncoarse_ds = void_procs; 3917 csin_type_simple = PETSC_FALSE; 3918 } else { 3919 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3920 csin_ds = PETSC_TRUE; 3921 ncoarse_ds = pcbddc->redistribute_coarse; 3922 redist = PETSC_TRUE; 3923 } else { 3924 csin_ds = PETSC_FALSE; 3925 ncoarse_ds = all_procs; 3926 } 3927 } 3928 } 3929 3930 /* 3931 test if we can go multilevel: three conditions must be satisfied: 3932 - we have not exceeded the number of levels requested 3933 - we can actually subassemble the active processes 3934 - we can find a suitable number of MPI processes where we can place the subassembled problem 3935 */ 3936 multilevel_allowed = PETSC_FALSE; 3937 multilevel_requested = PETSC_FALSE; 3938 if (pcbddc->current_level < pcbddc->max_levels) { 3939 multilevel_requested = PETSC_TRUE; 3940 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3941 multilevel_allowed = PETSC_FALSE; 3942 } else { 3943 multilevel_allowed = PETSC_TRUE; 3944 } 3945 } 3946 /* determine number of process partecipating to coarse solver */ 3947 if (multilevel_allowed) { 3948 ncoarse = ncoarse_ml; 3949 csin = csin_ml; 3950 redist = PETSC_FALSE; 3951 } else { 3952 ncoarse = ncoarse_ds; 3953 csin = csin_ds; 3954 } 3955 3956 /* creates temporary l2gmap and IS for coarse indexes */ 3957 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3958 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3959 3960 /* creates temporary MATIS object for coarse matrix */ 3961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3962 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3963 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3964 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3965 #if 0 3966 { 3967 PetscViewer viewer; 3968 char filename[256]; 3969 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3970 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3971 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3972 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3973 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3974 } 3975 #endif 3976 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3977 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3978 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3980 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3981 3982 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3983 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3984 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3985 const PetscInt *idxs; 3986 ISLocalToGlobalMapping tmap; 3987 3988 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3989 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3990 /* allocate space for temporary storage */ 3991 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3992 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3993 /* allocate for IS array */ 3994 nisdofs = pcbddc->n_ISForDofsLocal; 3995 nisneu = !!pcbddc->NeumannBoundariesLocal; 3996 nis = nisdofs + nisneu; 3997 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3998 /* dofs splitting */ 3999 for (i=0;i<nisdofs;i++) { 4000 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4001 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4002 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4003 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4004 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4005 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4006 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4007 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4008 } 4009 /* neumann boundaries */ 4010 if (pcbddc->NeumannBoundariesLocal) { 4011 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4012 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4013 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4014 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4015 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4016 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4017 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4018 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4019 } 4020 /* free memory */ 4021 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4022 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4023 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4024 } else { 4025 nis = 0; 4026 nisdofs = 0; 4027 nisneu = 0; 4028 isarray = NULL; 4029 } 4030 /* destroy no longer needed map */ 4031 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4032 4033 /* restrict on coarse candidates (if needed) */ 4034 coarse_mat_is = NULL; 4035 if (csin) { 4036 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4037 if (redist) { 4038 PetscMPIInt rank; 4039 PetscInt spc,n_spc_p1,dest[1],destsize; 4040 4041 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4042 spc = active_procs/ncoarse; 4043 n_spc_p1 = active_procs%ncoarse; 4044 if (im_active) { 4045 destsize = 1; 4046 if (rank > n_spc_p1*(spc+1)-1) { 4047 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4048 } else { 4049 dest[0] = rank/(spc+1); 4050 } 4051 } else { 4052 destsize = 0; 4053 } 4054 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4055 } else if (csin_type_simple) { 4056 PetscMPIInt rank; 4057 PetscInt issize,isidx; 4058 4059 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4060 if (im_active) { 4061 issize = 1; 4062 isidx = (PetscInt)rank; 4063 } else { 4064 issize = 0; 4065 isidx = -1; 4066 } 4067 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4068 } else { /* get a suitable subassembling pattern from MATIS code */ 4069 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4070 } 4071 4072 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4073 if (!redist || ncoarse <= void_procs) { 4074 PetscInt ncoarse_cand,tissize,*nisindices; 4075 PetscInt *coarse_candidates; 4076 const PetscInt* tisindices; 4077 4078 /* get coarse candidates' ranks in pc communicator */ 4079 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4080 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4081 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4082 if (!coarse_candidates[i]) { 4083 coarse_candidates[ncoarse_cand++]=i; 4084 } 4085 } 4086 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4087 4088 4089 if (pcbddc->dbg_flag) { 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4092 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4094 for (i=0;i<ncoarse_cand;i++) { 4095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4096 } 4097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4099 } 4100 /* shift the pattern on coarse candidates */ 4101 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4102 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4103 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4104 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4105 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4106 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4107 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4108 } 4109 if (pcbddc->dbg_flag) { 4110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4111 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4112 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4114 } 4115 } 4116 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4117 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4118 } else { 4119 if (pcbddc->dbg_flag) { 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4123 } 4124 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4125 coarse_mat_is = t_coarse_mat_is; 4126 } 4127 4128 /* create local to global scatters for coarse problem */ 4129 if (compute_vecs) { 4130 PetscInt lrows; 4131 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4132 if (coarse_mat_is) { 4133 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4134 } else { 4135 lrows = 0; 4136 } 4137 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4138 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4139 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4140 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4141 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4142 } 4143 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4144 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4145 4146 /* set defaults for coarse KSP and PC */ 4147 if (multilevel_allowed) { 4148 coarse_ksp_type = KSPRICHARDSON; 4149 coarse_pc_type = PCBDDC; 4150 } else { 4151 coarse_ksp_type = KSPPREONLY; 4152 coarse_pc_type = PCREDUNDANT; 4153 } 4154 4155 /* print some info if requested */ 4156 if (pcbddc->dbg_flag) { 4157 if (!multilevel_allowed) { 4158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4159 if (multilevel_requested) { 4160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4161 } else if (pcbddc->max_levels) { 4162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4163 } 4164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4165 } 4166 } 4167 4168 /* create the coarse KSP object only once with defaults */ 4169 if (coarse_mat_is) { 4170 MatReuse coarse_mat_reuse; 4171 PetscViewer dbg_viewer = NULL; 4172 if (pcbddc->dbg_flag) { 4173 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4174 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4175 } 4176 if (!pcbddc->coarse_ksp) { 4177 char prefix[256],str_level[16]; 4178 size_t len; 4179 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4180 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4181 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4182 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4183 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4184 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4185 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4186 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4187 /* prefix */ 4188 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4189 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4190 if (!pcbddc->current_level) { 4191 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4192 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4193 } else { 4194 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4195 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4196 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4197 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4198 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4199 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4200 } 4201 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4202 /* allow user customization */ 4203 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4204 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4205 } 4206 4207 /* get some info after set from options */ 4208 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4211 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4212 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4213 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4214 isbddc = PETSC_FALSE; 4215 } 4216 if (isredundant) { 4217 KSP inner_ksp; 4218 PC inner_pc; 4219 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4220 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4221 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4222 } 4223 4224 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4225 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4226 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4227 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4228 if (nisdofs) { 4229 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4230 for (i=0;i<nisdofs;i++) { 4231 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4232 } 4233 } 4234 if (nisneu) { 4235 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4236 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4237 } 4238 4239 /* assemble coarse matrix */ 4240 if (coarse_reuse) { 4241 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4242 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4243 coarse_mat_reuse = MAT_REUSE_MATRIX; 4244 } else { 4245 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4246 } 4247 if (isbddc || isnn) { 4248 if (pcbddc->coarsening_ratio > 1) { 4249 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4250 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4251 if (pcbddc->dbg_flag) { 4252 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4253 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4254 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4255 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4256 } 4257 } 4258 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4259 } else { 4260 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4261 coarse_mat = coarse_mat_is; 4262 } 4263 } else { 4264 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4265 } 4266 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4267 4268 /* propagate symmetry info to coarse matrix */ 4269 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4270 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4271 4272 /* set operators */ 4273 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4274 if (pcbddc->dbg_flag) { 4275 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4276 } 4277 } else { /* processes non partecipating to coarse solver (if any) */ 4278 coarse_mat = 0; 4279 } 4280 ierr = PetscFree(isarray);CHKERRQ(ierr); 4281 #if 0 4282 { 4283 PetscViewer viewer; 4284 char filename[256]; 4285 sprintf(filename,"coarse_mat.m"); 4286 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4287 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4288 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4289 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4290 } 4291 #endif 4292 4293 /* Compute coarse null space (special handling by BDDC only) */ 4294 if (pcbddc->NullSpace) { 4295 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4296 } 4297 4298 if (pcbddc->coarse_ksp) { 4299 Vec crhs,csol; 4300 PetscBool ispreonly; 4301 if (CoarseNullSpace) { 4302 if (isbddc) { 4303 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4304 } else { 4305 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4306 } 4307 } 4308 /* setup coarse ksp */ 4309 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4310 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4311 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4312 /* hack */ 4313 if (!csol) { 4314 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4315 } 4316 if (!crhs) { 4317 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4318 } 4319 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4320 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4321 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4322 KSP check_ksp; 4323 KSPType check_ksp_type; 4324 PC check_pc; 4325 Vec check_vec,coarse_vec; 4326 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4327 PetscInt its; 4328 PetscBool compute_eigs; 4329 PetscReal *eigs_r,*eigs_c; 4330 PetscInt neigs; 4331 const char *prefix; 4332 4333 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4334 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4335 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4336 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4337 if (ispreonly) { 4338 check_ksp_type = KSPPREONLY; 4339 compute_eigs = PETSC_FALSE; 4340 } else { 4341 check_ksp_type = KSPGMRES; 4342 compute_eigs = PETSC_TRUE; 4343 } 4344 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4345 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4346 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4347 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4348 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4349 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4350 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4351 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4352 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4353 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4354 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4355 /* create random vec */ 4356 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4357 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4358 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4359 if (CoarseNullSpace) { 4360 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4361 } 4362 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4363 /* solve coarse problem */ 4364 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4365 if (CoarseNullSpace) { 4366 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4367 } 4368 /* set eigenvalue estimation if preonly has not been requested */ 4369 if (compute_eigs) { 4370 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4371 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4372 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4373 lambda_max = eigs_r[neigs-1]; 4374 lambda_min = eigs_r[0]; 4375 if (pcbddc->use_coarse_estimates) { 4376 if (lambda_max>lambda_min) { 4377 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4378 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4379 } 4380 } 4381 } 4382 4383 /* check coarse problem residual error */ 4384 if (pcbddc->dbg_flag) { 4385 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4386 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4387 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4388 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4389 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4390 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4391 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4392 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4393 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4394 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4395 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4396 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4397 if (compute_eigs) { 4398 PetscReal lambda_max_s,lambda_min_s; 4399 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4400 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4401 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4403 for (i=0;i<neigs;i++) { 4404 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4405 } 4406 } 4407 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4408 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4409 } 4410 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4411 if (compute_eigs) { 4412 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4413 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4414 } 4415 } 4416 } 4417 /* print additional info */ 4418 if (pcbddc->dbg_flag) { 4419 /* waits until all processes reaches this point */ 4420 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4421 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4423 } 4424 4425 /* free memory */ 4426 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4427 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4433 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4434 { 4435 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4436 PC_IS* pcis = (PC_IS*)pc->data; 4437 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4438 PetscInt i,coarse_size; 4439 PetscInt *local_primal_indices; 4440 PetscErrorCode ierr; 4441 4442 PetscFunctionBegin; 4443 /* Compute global number of coarse dofs */ 4444 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4445 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4446 } 4447 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4448 4449 /* check numbering */ 4450 if (pcbddc->dbg_flag) { 4451 PetscScalar coarsesum,*array; 4452 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4453 4454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4457 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4458 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4459 for (i=0;i<pcbddc->local_primal_size;i++) { 4460 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4461 } 4462 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4463 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4464 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4465 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4466 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4467 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4468 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4469 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4470 for (i=0;i<pcis->n;i++) { 4471 if (array[i] == 1.0) { 4472 set_error = PETSC_TRUE; 4473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4474 } 4475 } 4476 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4478 for (i=0;i<pcis->n;i++) { 4479 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4480 } 4481 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4482 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4483 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4484 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4485 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4487 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4491 for (i=0;i<pcbddc->local_primal_size;i++) { 4492 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4493 } 4494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4495 } 4496 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4497 if (set_error_reduced) { 4498 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4499 } 4500 } 4501 /* get back data */ 4502 *coarse_size_n = coarse_size; 4503 *local_primal_indices_n = local_primal_indices; 4504 PetscFunctionReturn(0); 4505 } 4506 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "PCBDDCGlobalToLocal" 4509 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4510 { 4511 IS localis_t; 4512 PetscInt i,lsize,*idxs,n; 4513 PetscScalar *vals; 4514 PetscErrorCode ierr; 4515 4516 PetscFunctionBegin; 4517 /* get indices in local ordering exploiting local to global map */ 4518 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4519 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4520 for (i=0;i<lsize;i++) vals[i] = 1.0; 4521 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4522 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4523 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4524 if (idxs) { /* multilevel guard */ 4525 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4526 } 4527 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4528 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4529 ierr = PetscFree(vals);CHKERRQ(ierr); 4530 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4531 /* now compute set in local ordering */ 4532 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4533 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4534 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4535 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4536 for (i=0,lsize=0;i<n;i++) { 4537 if (PetscRealPart(vals[i]) > 0.5) { 4538 lsize++; 4539 } 4540 } 4541 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4542 for (i=0,lsize=0;i<n;i++) { 4543 if (PetscRealPart(vals[i]) > 0.5) { 4544 idxs[lsize++] = i; 4545 } 4546 } 4547 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4548 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4549 *localis = localis_t; 4550 PetscFunctionReturn(0); 4551 } 4552 4553 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4554 #undef __FUNCT__ 4555 #define __FUNCT__ "PCBDDCMatMult_Private" 4556 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4557 { 4558 PCBDDCChange_ctx change_ctx; 4559 PetscErrorCode ierr; 4560 4561 PetscFunctionBegin; 4562 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4563 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4564 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4565 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4566 PetscFunctionReturn(0); 4567 } 4568 4569 #undef __FUNCT__ 4570 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4571 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4572 { 4573 PCBDDCChange_ctx change_ctx; 4574 PetscErrorCode ierr; 4575 4576 PetscFunctionBegin; 4577 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4578 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4579 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4580 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4581 PetscFunctionReturn(0); 4582 } 4583 4584 #undef __FUNCT__ 4585 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4586 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4587 { 4588 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4589 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4590 PetscInt *used_xadj,*used_adjncy; 4591 PetscBool free_used_adj; 4592 PetscErrorCode ierr; 4593 4594 PetscFunctionBegin; 4595 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4596 free_used_adj = PETSC_FALSE; 4597 if (pcbddc->sub_schurs_layers == -1) { 4598 used_xadj = NULL; 4599 used_adjncy = NULL; 4600 } else { 4601 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4602 used_xadj = pcbddc->mat_graph->xadj; 4603 used_adjncy = pcbddc->mat_graph->adjncy; 4604 } else if (pcbddc->computed_rowadj) { 4605 used_xadj = pcbddc->mat_graph->xadj; 4606 used_adjncy = pcbddc->mat_graph->adjncy; 4607 } else { 4608 PetscBool flg_row=PETSC_FALSE; 4609 const PetscInt *xadj,*adjncy; 4610 PetscInt nvtxs; 4611 4612 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4613 if (flg_row) { 4614 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4615 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4616 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4617 free_used_adj = PETSC_TRUE; 4618 } else { 4619 pcbddc->sub_schurs_layers = -1; 4620 used_xadj = NULL; 4621 used_adjncy = NULL; 4622 } 4623 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4624 } 4625 } 4626 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4627 4628 /* free adjacency */ 4629 if (free_used_adj) { 4630 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4631 } 4632 PetscFunctionReturn(0); 4633 } 4634 4635 #undef __FUNCT__ 4636 #define __FUNCT__ "PCBDDCInitSubSchurs" 4637 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4638 { 4639 PC_IS *pcis=(PC_IS*)pc->data; 4640 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4641 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4642 PCBDDCGraph graph; 4643 Mat S_j; 4644 PetscErrorCode ierr; 4645 4646 PetscFunctionBegin; 4647 /* attach interface graph for determining subsets */ 4648 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4649 IS verticesIS; 4650 4651 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4652 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4655 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4656 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4657 /* 4658 if (pcbddc->dbg_flag) { 4659 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 } 4661 */ 4662 } else { 4663 graph = pcbddc->mat_graph; 4664 } 4665 4666 /* Create Schur complement matrix */ 4667 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4668 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4669 4670 /* sub_schurs init */ 4671 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4672 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4673 /* free graph struct */ 4674 if (pcbddc->sub_schurs_rebuild) { 4675 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4676 } 4677 PetscFunctionReturn(0); 4678 } 4679